In [1]:
import datetime as dt

import numpy as np
import pandas as pd
from scipy import stats
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer


In [2]:
#make pandas display large datasets without '...'s
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# General part
## Read raw data

In [3]:
data = pd.read_csv('data/raw_data.csv', index_col=0)

In [4]:
data.head()  #what does the data look like?

Unnamed: 0_level_0,Name,DateTime,OutcomeType,OutcomeSubtype,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
AnimalID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,,Dog,Neutered Male,1 year,Shetland Sheepdog Mix,Brown/White
A656520,Emily,2013-10-13 12:44:00,Euthanasia,Suffering,Cat,Spayed Female,1 year,Domestic Shorthair Mix,Cream Tabby
A686464,Pearce,2015-01-31 12:28:00,Adoption,Foster,Dog,Neutered Male,2 years,Pit Bull Mix,Blue/White
A683430,,2014-07-11 19:09:00,Transfer,Partner,Cat,Intact Male,3 weeks,Domestic Shorthair Mix,Blue Cream
A667013,,2013-11-15 12:52:00,Transfer,Partner,Dog,Neutered Male,2 years,Lhasa Apso/Miniature Poodle,Tan


## Standardize AgeuponOutcome to years

In [5]:
data.AgeuponOutcome.value_counts(dropna=False)  #what are the old values? for comparison with the result

1 year       3969
2 years      3742
2 months     3397
3 years      1823
1 month      1281
3 months     1277
4 years      1071
5 years       992
4 months      888
6 years       670
3 weeks       659
5 months      652
6 months      588
8 years       536
7 years       531
2 weeks       529
10 months     457
10 years      446
8 months      402
4 weeks       334
9 years       288
7 months      288
12 years      234
9 months      224
1 weeks       171
11 months     166
1 week        146
13 years      143
11 years      126
3 days        109
2 days         99
14 years       97
15 years       85
1 day          66
6 days         50
4 days         50
16 years       36
5 days         24
0 years        22
NaN            18
17 years       17
5 weeks        11
18 years       10
19 years        3
20 years        2
Name: AgeuponOutcome, dtype: int64

In [6]:
def transform_to_years(age_string):
    '''takes as input the AgeuponOutcome feature of the raw data and outputs the corresponding number of years'''

    if age_string is np.nan:
        return np.nan

    split_string = age_string.split()

    if split_string[1].strip('s') == 'year':
        return float(split_string[0])

    elif split_string[1].strip('s') == 'month':
        return float(split_string[0]) / 12

    elif split_string[1].strip('s') == 'week':
        return float(split_string[0]) / 52

    elif split_string[1].strip('s') == 'day':
        return float(split_string[0]) / 365

In [7]:
data['AgeuponOutcome'] = data.AgeuponOutcome.apply(transform_to_years)  #apply the function

In [8]:
data.AgeuponOutcome.value_counts(dropna=False)  #looks good

1.000000     3969
2.000000     3742
0.166667     3397
3.000000     1823
0.083333     1281
0.250000     1277
4.000000     1071
5.000000      992
0.333333      888
6.000000      670
0.057692      659
0.416667      652
0.500000      588
8.000000      536
7.000000      531
0.038462      529
0.833333      457
10.000000     446
0.666667      402
0.076923      334
0.019231      317
9.000000      288
0.583333      288
12.000000     234
0.750000      224
0.916667      166
13.000000     143
11.000000     126
0.008219      109
0.005479       99
14.000000      97
15.000000      85
0.002740       66
0.010959       50
0.016438       50
16.000000      36
0.013699       24
0.000000       22
NaN            18
17.000000      17
0.096154       11
18.000000      10
19.000000       3
20.000000       2
Name: AgeuponOutcome, dtype: int64

There seem to be some NaN values and some values which are zero which turned out to be the "0 years" entries.
These entries don't provide any valuable information, so we will delete these entries (since they are not many deleting them is fine in this case).

In [9]:
data = data.dropna(subset=['AgeuponOutcome'])
data = data[data.AgeuponOutcome != 0.0]
data.AgeuponOutcome.value_counts(dropna=False)  #looks good

1.000000     3969
2.000000     3742
0.166667     3397
3.000000     1823
0.083333     1281
0.250000     1277
4.000000     1071
5.000000      992
0.333333      888
6.000000      670
0.057692      659
0.416667      652
0.500000      588
8.000000      536
7.000000      531
0.038462      529
0.833333      457
10.000000     446
0.666667      402
0.076923      334
0.019231      317
0.583333      288
9.000000      288
12.000000     234
0.750000      224
0.916667      166
13.000000     143
11.000000     126
0.008219      109
0.005479       99
14.000000      97
15.000000      85
0.002740       66
0.010959       50
0.016438       50
16.000000      36
0.013699       24
17.000000      17
0.096154       11
18.000000      10
19.000000       3
20.000000       2
Name: AgeuponOutcome, dtype: int64

## Transform date column

In [10]:
#extract the year, month and weekday since these are the features we need for data understanding

data['year'] = data.DateTime.apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').year)
data['month'] = data.DateTime.apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').month)
data['weekday'] = data.DateTime.apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').weekday())

# Replace by Strings
dict = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}
data["weekday"] = data["weekday"].map(dict)

dict = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', 7: 'July', 8: 'August',
        9: 'September', 10: 'October', 11: 'November', 12: 'December'}
data["month"] = data["month"].map(dict)

## Transform SexuponOutcome

In [11]:
data.SexuponOutcome.value_counts(dropna=False)  #what are the old values?

Neutered Male    9779
Spayed Female    8819
Intact Male      3516
Intact Female    3498
Unknown          1076
NaN                 1
Name: SexuponOutcome, dtype: int64

In [12]:
def transform_sex(sex_string, neutralized=False):
    '''takes as input the SexuponOutcome feature and returns just the sex or if the animal was neutralized'''

    if (sex_string is np.nan) or (sex_string == 'Unknown'):
        return np.nan

    split_string = sex_string.split()

    if neutralized:
        if split_string[0] == 'Intact':
            return False
        else:
            return True
    else:
        return split_string[1]

In [13]:
#apply the function
data['neutralized'] = data.SexuponOutcome.apply(lambda x: transform_sex(x, True))
data['SexuponOutcome'] = data.SexuponOutcome.apply(transform_sex)

We will also drop the NA values here

In [14]:
data = data.dropna(subset=['neutralized'])

In [15]:
data.SexuponOutcome.value_counts(dropna=False)  #looks good

Male      13295
Female    12317
Name: SexuponOutcome, dtype: int64

In [16]:
data.neutralized.value_counts(dropna=False)  #looks good

True     18598
False     7014
Name: neutralized, dtype: int64

## Investigate colors
Let's look at the colors next:

In [17]:
print(f"Current number of unique colors: {data.Color.nunique()}")

Current number of unique colors: 361


Since these are a bit too many to handle we will try and reduce the number of colors.
Let's look at what values we are dealing with and how often they occur.

In [18]:
data.Color.value_counts()

Black/White                    2700
Black                          2088
Brown Tabby                    1454
White                           894
Brown/White                     884
Brown Tabby/White               845
Tan/White                       771
Orange Tabby                    764
Tricolor                        749
Black/Tan                       672
Blue/White                      670
Brown                           630
Tan                             620
White/Black                     615
White/Brown                     562
Tortie                          511
Calico                          498
Brown Brindle/White             450
Black/Brown                     435
Orange Tabby/White              431
Blue                            417
Blue Tabby                      401
White/Tan                       389
Red                             337
Red/White                       331
Brown/Black                     330
Torbie                          321
Brown Brindle               

In [19]:
data["ColorMix"] = data.Color.str.contains('/')
data.Color = data.Color.str.split('/', expand=True)[0]
data.Color.value_counts()

Black                6093
White                3238
Brown Tabby          2316
Brown                1939
Tan                  1662
Orange Tabby         1197
Blue                 1134
Tricolor              797
Red                   779
Brown Brindle         697
Blue Tabby            625
Tortie                559
Calico                533
Chocolate             448
Torbie                382
Sable                 322
Buff                  267
Cream Tabby           261
Yellow                230
Gray                  218
Cream                 214
Fawn                  209
Lynx Point            175
Blue Merle            165
Seal Point            147
Black Brindle          99
Flame Point            83
Gold                   77
Brown Merle            71
Black Smoke            62
Black Tabby            53
Silver                 53
Red Merle              52
Blue Tick              43
Red Tick               40
Silver Tabby           39
Lilac Point            37
Gray Tabby             36
Tortie Point

In [20]:
data.ColorMix.value_counts()

True     13527
False    12085
Name: ColorMix, dtype: int64

In [21]:
print(f"We were able to reduce the number of unique colors to: {data.Color.nunique()}")

We were able to reduce the number of unique colors to: 56


We were able to reduce the number of colors quite a bit while still retaining the information that an animal has a pure 
or mixed color (which is actually almost half of the animals). This will probably suffice for this use case.
For the moment we will keep any outliers in the color category since we want to see if rare colors will get adopted more often.
Later in this notebook we will drop these outliers for modelling.

## Drop OutcomeSubtype

Let's look at the OutcomeSubtype column:

In [22]:
data.OutcomeSubtype.value_counts(dropna=False)  #a lot of NA values

NaN                    13594
Partner                 7159
Foster                  1800
SCRP                    1281
Suffering                908
Aggressive               319
Offsite                  165
In Kennel                 98
Behavior                  86
Rabies Risk               69
Medical                   63
In Foster                 49
Court/Investigation        6
Enroute                    6
At Vet                     4
In Surgery                 3
Barn                       2
Name: OutcomeSubtype, dtype: int64

Since we observe a lot of NA values, and we are not too interested in the specific outcomes in detail but rather the
general outcome type we decided to drop this column as it just introduces unnecessary complexity to the data which doesn't add
nearly as much valuable information in order for it to be reasonably retained.

In [23]:
data = data.drop(["OutcomeSubtype"], axis=1)

## Transform Breed

In [24]:
data["BreedMix"] = data.Breed.str.contains("Mix")  #creating new boolean column for Mix

In [26]:
data["Breed"] = data['Breed'].str.replace("/.*", "")

  """Entry point for launching an IPython kernel.


In [27]:
data[data.AnimalType == 'Dog'].Breed.nunique()  #reduced unique breeds by over 1000

187

In [28]:
data[data.AnimalType == 'Dog'].Breed.value_counts()

Chihuahua Shorthair                   2130
Pit Bull                              2107
Labrador Retriever                    1897
German Shepherd                        824
Australian Cattle Dog                  510
Dachshund                              504
Boxer                                  352
Border Collie                          330
Miniature Poodle                       310
Australian Shepherd                    228
Yorkshire Terrier                      226
Jack Russell Terrier                   222
Miniature Schnauzer                    220
Catahoula                              218
Rat Terrier                            214
Beagle                                 214
Siberian Husky                         194
Rottweiler                             180
Shih Tzu                               176
Chihuahua Longhair                     168
Cairn Terrier                          140
Pointer                                139
Great Pyrenees                         131
American Bu

In [29]:
data[data.AnimalType == 'Cat'].Breed.nunique()  #still a reduction by about 50%, but initial values were much lower anyway

33

In [30]:
data[data.AnimalType == 'Cat'].Breed.value_counts()

Domestic Shorthair      8032
Domestic Medium Hair     836
Domestic Longhair        515
Siamese                  406
Snowshoe                  74
Manx                      47
Maine Coon                47
Russian Blue              34
Himalayan                 18
Persian                   14
Ragdoll                   12
American Shorthair         8
Angora                     7
Japanese Bobtail           6
Bengal                     5
Bombay                     5
Balinese                   5
British Shorthair          4
Tonkinese                  4
Pixiebob Shorthair         3
Cymric                     2
Turkish Van                2
Abyssinian                 2
Sphynx                     2
Devon Rex                  2
Javanese                   2
Exotic Shorthair           2
Burmese                    1
Ocicat                     1
Munchkin Longhair          1
Cornish Rex                1
Norwegian Forest Cat       1
Havana Brown               1
Name: Breed, dtype: int64

In order to provide more meaningful meta categories of the different breeds we first removed the "mix" attribute from the breed column and instead created a separate boolean column which indicates whether or not a given animal has mixed attributes. We also made the compromise of removing extra specifications of the breeds, which were indicated by slashes in the raw dataset, because here again most of the additional information is we too specific to be of any particular use for our models. In fact, if we kept all this data we would actually much more prone to overfitting.

## Save data for Visualization

We will save two instances of the data for different purposes that are:
- Visualization
- Modelling

Here we conclude the data transformation for visualizing the data the following code will do some preprocessing steps
for the modelling part.

In [31]:
data.head()

Unnamed: 0_level_0,Name,DateTime,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color,year,month,weekday,neutralized,ColorMix,BreedMix
AnimalID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,Dog,Male,1.0,Shetland Sheepdog,Brown,2014,February,Wednesday,True,True,True
A656520,Emily,2013-10-13 12:44:00,Euthanasia,Cat,Female,1.0,Domestic Shorthair,Cream Tabby,2013,October,Sunday,True,False,True
A686464,Pearce,2015-01-31 12:28:00,Adoption,Dog,Male,2.0,Pit Bull,Blue,2015,January,Saturday,True,True,True
A683430,,2014-07-11 19:09:00,Transfer,Cat,Male,0.057692,Domestic Shorthair,Blue Cream,2014,July,Friday,False,False,True
A667013,,2013-11-15 12:52:00,Transfer,Dog,Male,2.0,Lhasa Apso,Tan,2013,November,Friday,True,False,False


In [32]:
# data.to_csv('data/transformed_data.csv',index_label='AnimalID')

# Transformation for modelling

## Drop Name and DateTime column

At first, we drop the Name and the DateTime column since they are not useful for modelling.

In [33]:
data_num = data.drop(labels=['Name', 'DateTime'], axis=1)

## Handle outliers
Next we will drop some outliers we retained for the exploratory analysis of the data but will only lead to overfitting of
the model since some entries are too specific and can not be generalized.

For this we will look at the z-score of some columns and drop the rows with the highest z-scores (high std).

In [34]:
data_num.AgeuponOutcome.value_counts(dropna=False).tail(5)

0.096154     11
0.013699     11
18.000000    10
19.000000     3
20.000000     2
Name: AgeuponOutcome, dtype: int64

In [35]:
data_num = data_num[(np.abs(stats.zscore(data_num.AgeuponOutcome)) < 3)]
data_num.AgeuponOutcome.value_counts(dropna=False).tail(5)

0.010959    38
0.002740    24
0.016438    21
0.096154    11
0.013699    11
Name: AgeuponOutcome, dtype: int64

As one can see we deleted the entries with an age of 19 and 20 years since these were outliers.


We will also delete outliers for Breeds and Colors:

In [36]:
data_num.Breed.value_counts(dropna=False).tail(30)

English Setter                 2
Bearded Collie                 2
Old English Sheepdog           2
Wirehaired Pointing Griffon    2
Javanese                       2
Neapolitan Mastiff             2
Devon Rex                      2
Field Spaniel                  2
Unknown                        2
English Cocker Spaniel         1
Havana Brown                   1
Swiss Hound                    1
Belgian Tervuren               1
Norwegian Forest Cat           1
Spanish Mastiff                1
Lowchen                        1
Cornish Rex                    1
Burmese                        1
Norwegian Elkhound             1
Hovawart                       1
Munchkin Longhair              1
Sealyham Terr                  1
Ocicat                         1
Mexican Hairless               1
Afghan Hound                   1
Entlebucher                    1
Otterhound                     1
Treeing Tennesse Brindle       1
Kuvasz                         1
Spinone Italiano               1
Name: Bree

In [37]:
data_num = data_num[data_num.groupby('Breed').Breed.transform('count') > 5]
data_num.Breed.value_counts(dropna=False).tail(30)

Newfoundland                11
Havanese                    11
Ragdoll                     11
Bullmastiff                 10
Smooth Fox Terrier           9
Leonberger                   9
Dutch Shepherd               9
Bernese Mountain Dog         9
American Eskimo              9
St. Bernard Rough Coat       9
English Coonhound            8
Cane Corso                   8
Treeing Walker Coonhound     8
Airedale Terrier             8
Silky Terrier                8
Schipperke                   8
Tibetan Terrier              8
Bloodhound                   8
Toy Fox Terrier              7
American Shorthair           7
Bluetick Hound               7
Angora                       7
English Springer Spaniel     7
Patterdale Terr              6
Swedish Vallhund             6
Japanese Bobtail             6
Landseer                     6
Feist                        6
Irish Wolfhound              6
Affenpinscher                6
Name: Breed, dtype: int64

In [38]:
data_num.Color.value_counts(dropna=False).tail(10)

Liver                12
Blue Tiger            8
Blue Smoke            5
Brown Tiger           3
Agouti                2
Silver Lynx Point     2
Liver Tick            2
Black Tiger           2
Ruddy                 1
Orange Tiger          1
Name: Color, dtype: int64

In [39]:
data_num = data_num[data_num.groupby('Color').Color.transform('count') > 3]
data_num.Color.value_counts(dropna=False).tail(10)

Yellow Brindle     31
Orange             31
Blue Point         29
Calico Point       27
Apricot            25
Chocolate Point    18
Blue Cream         17
Liver              12
Blue Tiger          8
Blue Smoke          5
Name: Color, dtype: int64

In [40]:
print(f"We are left with {data_num.Color.nunique()} different colors and {data_num.Breed.nunique()} different breeds.")

We are left with 49 different colors and 142 different breeds.


## Vectorization
Now we one hot encode the two string columns that are left (Breed and Color):

In [41]:
data_num.reset_index(drop=True, inplace=True)
cv = CountVectorizer(token_pattern='(?u)[a-zA-Z][a-z ]+')
matrix_colors = cv.fit_transform(data_num.Color)
df_colors = pd.DataFrame(matrix_colors.toarray(), columns=cv.get_feature_names())
df_colors.head()
df_colors.reset_index(drop=True, inplace=True)



In [42]:
data_num = pd.concat([data_num, df_colors], axis=1)
data_num = data_num.drop('Color', axis=1)
data_num.head()

Unnamed: 0,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,year,month,weekday,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle
0,Return_to_owner,Dog,Male,1.0,Shetland Sheepdog,2014,February,Wednesday,True,True,True,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Euthanasia,Cat,Female,1.0,Domestic Shorthair,2013,October,Sunday,True,False,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Adoption,Dog,Male,2.0,Pit Bull,2015,January,Saturday,True,True,True,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Transfer,Cat,Male,0.057692,Domestic Shorthair,2014,July,Friday,False,False,True,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Transfer,Dog,Male,2.0,Lhasa Apso,2013,November,Friday,True,False,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [43]:
data_num.reset_index(drop=True, inplace=True)
cv = CountVectorizer(token_pattern='(?u)[a-zA-Z][a-z ]+')
matrix_breeds = cv.fit_transform(data_num.Breed)
df_breeds = pd.DataFrame(matrix_breeds.toarray(), columns=cv.get_feature_names())
df_breeds.head()
df_breeds.reset_index(drop=True, inplace=True)



In [44]:
data_num = pd.concat([data_num, df_breeds], axis=1)
data_num = data_num.drop('Breed', axis=1)
data_num.head()

Unnamed: 0,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,year,month,weekday,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier
0,Return_to_owner,Dog,Male,1.0,2014,February,Wednesday,True,True,True,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Euthanasia,Cat,Female,1.0,2013,October,Sunday,True,False,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Adoption,Dog,Male,2.0,2015,January,Saturday,True,True,True,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Transfer,Cat,Male,0.057692,2014,July,Friday,False,False,True,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Transfer,Dog,Male,2.0,2013,November,Friday,True,False,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [45]:
data_num.reset_index(drop=True, inplace=True)
cv = CountVectorizer()
matrix_months = cv.fit_transform(data_num.month)
df_months = pd.DataFrame(matrix_months.toarray(), columns=cv.get_feature_names())
df_months.head()
df_months.reset_index(drop=True, inplace=True)



In [46]:
data_num = pd.concat([data_num, df_months], axis=1)
data_num = data_num.drop('month', axis=1)
data_num.head()

Unnamed: 0,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,year,weekday,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september
0,Return_to_owner,Dog,Male,1.0,2014,Wednesday,True,True,True,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,Euthanasia,Cat,Female,1.0,2013,Sunday,True,False,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,Adoption,Dog,Male,2.0,2015,Saturday,True,True,True,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,Transfer,Cat,Male,0.057692,2014,Friday,False,False,True,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,Transfer,Dog,Male,2.0,2013,Friday,True,False,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [47]:
data_num.reset_index(drop=True, inplace=True)
cv = CountVectorizer()
matrix_weekdays = cv.fit_transform(data_num.weekday)
df_weekdays = pd.DataFrame(matrix_weekdays.toarray(), columns=cv.get_feature_names())
df_weekdays.head()
df_weekdays.reset_index(drop=True, inplace=True)



In [48]:
data_num = pd.concat([data_num, df_weekdays], axis=1)
data_num = data_num.drop('weekday', axis=1)
data_num.head()

Unnamed: 0,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,year,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september,friday,monday,saturday,sunday,thursday,tuesday,wednesday
0,Return_to_owner,Dog,Male,1.0,2014,True,True,True,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Euthanasia,Cat,Female,1.0,2013,True,False,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
2,Adoption,Dog,Male,2.0,2015,True,True,True,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,Transfer,Cat,Male,0.057692,2014,False,False,True,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
4,Transfer,Dog,Male,2.0,2013,True,False,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0


## Transform to numeric features

In [49]:
data_num.head()

Unnamed: 0,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,year,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september,friday,monday,saturday,sunday,thursday,tuesday,wednesday
0,Return_to_owner,Dog,Male,1.0,2014,True,True,True,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Euthanasia,Cat,Female,1.0,2013,True,False,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
2,Adoption,Dog,Male,2.0,2015,True,True,True,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,Transfer,Cat,Male,0.057692,2014,False,False,True,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
4,Transfer,Dog,Male,2.0,2013,True,False,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0


In [50]:
data_num['AnimalType'] = data_num.AnimalType.apply(lambda x: 1 if x == 'Dog' else 0)

data_num['SexuponOutcome'] = data_num.SexuponOutcome.apply(lambda x: 1 if x == 'Female' else 0)

data_num['BreedMix'] = data_num.BreedMix.astype(float)

data_num['ColorMix'] = data_num.ColorMix.astype(float)

data_num['neutralized'] = data_num.neutralized.apply(float)

In [51]:
data_num.head()

Unnamed: 0,OutcomeType,AnimalType,SexuponOutcome,AgeuponOutcome,year,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september,friday,monday,saturday,sunday,thursday,tuesday,wednesday
0,Return_to_owner,1,0,1.0,2014,1.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Euthanasia,0,1,1.0,2013,1.0,0.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
2,Adoption,1,0,2.0,2015,1.0,1.0,1.0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,Transfer,0,0,0.057692,2014,0.0,0.0,1.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
4,Transfer,1,0,2.0,2013,1.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0


## Standardize

In [52]:
X = data_num.drop('OutcomeType', axis=1)
Y = data_num.OutcomeType

scaler = preprocessing.MinMaxScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
#Y = Y == 'Adoption' #for binary classification of adoption
X.head()

Unnamed: 0,AnimalType,SexuponOutcome,AgeuponOutcome,year,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september,friday,monday,saturday,sunday,thursday,tuesday,wednesday
0,1.0,0.0,0.090683,0.333333,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,1.0,0.090683,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1.0,0.0,0.181614,0.666667,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.004997,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.181614,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
data_num = pd.concat([X, Y], axis=1)
data_num.head()

Unnamed: 0,AnimalType,SexuponOutcome,AgeuponOutcome,year,neutralized,ColorMix,BreedMix,apricot,black,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.1,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september,friday,monday,saturday,sunday,thursday,tuesday,wednesday,OutcomeType
0,1.0,0.0,0.090683,0.333333,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Return_to_owner
1,0.0,1.0,0.090683,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,Euthanasia
2,1.0,0.0,0.181614,0.666667,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,Adoption
3,0.0,0.0,0.004997,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,Transfer
4,1.0,0.0,0.181614,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,Transfer


## Standardize column names

In [54]:
cols = data_num.columns.tolist()
cols = cols[-1:] + cols[:-1]
data_num = data_num[cols]

In [55]:
data_num = data_num.rename(
    columns={"OutcomeType": "outcometype", "AnimalType": "animaltype", "SexuponOutcome": "sex", "AgeuponOutcome": "age",
             "ColorMix": "colormix", "BreedMix": "breedmix"})
data_num.head()

Unnamed: 0,outcometype,animaltype,sex,age,year,neutralized,colormix,breedmix,apricot,black,black.1,black brindle,black smoke,black tabby,blue,blue cream,blue merle,blue point,blue smoke,blue tabby,blue tick,blue tiger,brown,brown brindle,brown merle,brown tabby,buff,calico,calico point,chocolate,chocolate point,cream,cream tabby,fawn,flame point,gold,gray,gray tabby,lilac point,liver,lynx point,orange,orange tabby,red,red merle,red tick,sable,seal point,silver,silver tabby,tan,torbie,tortie,tortie point,tricolor,white,yellow,yellow brindle,affenpinscher,airedale terrier,akita,alaskan husky,alaskan malamute,american bulldog,american eskimo,american pit bull terrier,american shorthair,american staffordshire terrier,anatol shepherd,angora,australian cattle dog,australian kelpie,australian shepherd,basenji,basset hound,beagle,beauceron,belgian malinois,bernard rough coat,bernard smooth coat,bernese mountain dog,bichon frise,black.2,black.3,black mouth cur,bloodhound,blue lacy,bluetick hound,border collie,border terrier,boston terrier,boxer,brittany,bruss griffon,bull terrier,bulldog,bullmastiff,cairn terrier,cane corso,cardigan welsh corgi,carolina dog,catahoula,cavalier span,chesa bay retr,chihuahua longhair,chihuahua shorthair,chinese sharpei,chow chow,cocker spaniel,collie rough,collie smooth,dachshund,dachshund longhair,dachshund wirehair,dalmatian,doberman pinsch,dogo argentino,domestic longhair,domestic medium hair,domestic shorthair,dutch shepherd,english bulldog,english coonhound,english pointer,english springer spaniel,feist,finnish spitz,flat coat retriever,french bulldog,german shepherd,german shorthair pointer,golden retriever,great dane,great pyrenees,greyhound,harrier,havanese,himalayan,irish wolfhound,italian greyhound,jack russell terrier,japanese bobtail,labrador retriever,landseer,leonberger,lhasa apso,maine coon,maltese,manchester terrier,manx,mastiff,miniature pinscher,miniature poodle,miniature schnauzer,newfoundland,norfolk terrier,norwich terrier,papillon,parson russell terrier,patterdale terr,pbgv,pekingese,pembroke welsh corgi,persian,pharaoh hound,pit bull,plott hound,pointer,pomeranian,pug,queensland heeler,ragdoll,rat terrier,redbone hound,rhod ridgeback,rottweiler,russian blue,schipperke,shetland sheepdog,shiba inu,shih tzu,siamese,siberian husky,silky terrier,smooth fox terrier,snowshoe,soft coated wheaten terrier,st,staffordshire,standard poodle,standard schnauzer,swedish vallhund,tibetan terrier,toy fox terrier,toy poodle,treeing walker coonhound,vizsla,weimaraner,west highland,whippet,wire hair fox terrier,yorkshire terrier,april,august,december,february,january,july,june,march,may,november,october,september,friday,monday,saturday,sunday,thursday,tuesday,wednesday
0,Return_to_owner,1.0,0.0,0.090683,0.333333,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,Euthanasia,0.0,1.0,0.090683,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,Adoption,1.0,0.0,0.181614,0.666667,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,Transfer,0.0,0.0,0.004997,0.333333,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Transfer,1.0,0.0,0.181614,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


## Save modeling data

In [57]:
# data_num.to_csv('data/transformed_data_num.csv', index_label='AnimalID')