# Which age group of animals has the highest adoption rate?

### 6 IV (age group), categorical

### 2 levels DV (adoptions), categorical

### Analysis Independent Chi-Square

# import packages

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import statsmodels
import scipy
from scipy import stats
from scipy.stats import boxcox
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.proportion import proportions_chisquare

# read in data

In [8]:
shelter = pd.read_csv(r'C:\Users\hazre\OneDrive\OneNote\GitHub\Final-Project\MasterShelter.csv')

In [9]:
pd.set_option('display.max_columns', None)
shelter.head()

Unnamed: 0,speciesname,intakeMonth,intakeMonthR,id,movementMonth,DogAgeGroup,intakeYear,movementDate1R,intakereason,movementYear,movementtype,sexname,CatAgeGroup
0,Cat,11,February,15801,5,Senior,2009,May,Moving,2017,Adoption,Female,Senior
1,Dog,12,February,15932,4,Senior,2009,April,Moving,2017,Adoption,Male,Mature
2,Dog,8,February,28859,4,Mature,2012,April,Abandoned,2017,Adoption,Female,Mature
3,Dog,8,February,28859,2,Mature,2012,February,Abandoned,2020,Reclaimed,Female,Mature
4,Cat,1,January,30812,4,Senior,2013,April,Abandoned,2017,Foster,Female,Mature


# data wrangling

## create a dictionary to convert movementtype into 'adopted' and 'not adopted'

In [15]:
shelter.movementtype.value_counts()

0          12874
1          11236
Escaped        8
Name: movementtype, dtype: int64

In [23]:
dictionary = {'movementtype':
             {"Adoption" : 0,
             "Escaped" : 1,
             "Foster": 1,
             "Reclaimed": 1,
             "Released To Wild": 1,
             "Stolen": 1,
             "Transfer": 1,}}
shelter.replace(dictionary, inplace = True)

In [24]:
shelter.movementtype.value_counts()

0    12874
1    11244
Name: movementtype, dtype: int64

In [25]:
shelter.head()

Unnamed: 0,speciesname,intakeMonth,intakeMonthR,id,movementMonth,DogAgeGroup,intakeYear,movementDate1R,intakereason,movementYear,movementtype,sexname,CatAgeGroup
0,Cat,11,February,15801,5,Senior,2009,May,Moving,2017,0,Female,Senior
1,Dog,12,February,15932,4,Senior,2009,April,Moving,2017,0,Male,Mature
2,Dog,8,February,28859,4,Mature,2012,April,Abandoned,2017,0,Female,Mature
3,Dog,8,February,28859,2,Mature,2012,February,Abandoned,2020,1,Female,Mature
4,Cat,1,January,30812,4,Senior,2013,April,Abandoned,2017,1,Female,Mature


In [28]:
shelter.CatAgeGroup.value_counts()

Adult         11200
Adolescent     8234
Mature         1852
Kitten         1399
Senior         1050
Geriatric       290
Unknown          93
Name: CatAgeGroup, dtype: int64

### remove unknown

In [50]:
shelter1 = shelter[(shelter.CatAgeGroup != 'Unknown') & (shelter.DogAgeGroup != "Unknown")]
shelter1.head()

Unnamed: 0,speciesname,intakeMonth,intakeMonthR,id,movementMonth,DogAgeGroup,intakeYear,movementDate1R,intakereason,movementYear,movementtype,sexname,CatAgeGroup
0,Cat,11,February,15801,5,Senior,2009,May,Moving,2017,0,Female,Senior
1,Dog,12,February,15932,4,Senior,2009,April,Moving,2017,0,Male,Mature
2,Dog,8,February,28859,4,Mature,2012,April,Abandoned,2017,0,Female,Mature
3,Dog,8,February,28859,2,Mature,2012,February,Abandoned,2020,1,Female,Mature
4,Cat,1,January,30812,4,Senior,2013,April,Abandoned,2017,1,Female,Mature


In [44]:
shelter1.CatAgeGroup.value_counts()

Adult         11200
Adolescent     8234
Mature         1852
Kitten         1399
Senior         1050
Geriatric       290
Name: CatAgeGroup, dtype: int64

### create a contingency table cats

In [45]:
shelter_crosstab = pd.crosstab(shelter1['CatAgeGroup'], shelter1['movementtype'])

In [46]:
shelter_crosstab

movementtype,0,1
CatAgeGroup,Unnamed: 1_level_1,Unnamed: 2_level_1
Adolescent,4345,3889
Adult,6623,4577
Geriatric,65,225
Kitten,532,867
Mature,952,900
Senior,356,694


# run the independent chi square for cats

In [47]:
stats.chi2_contingency(shelter_crosstab)

(557.349779889623,
 3.3065808459449708e-118,
 5,
 array([[4411.91600416, 3822.08399584],
        [6001.14880333, 5198.85119667],
        [ 155.38688866,  134.61311134],
        [ 749.60778356,  649.39221644],
        [ 992.33281998,  859.66718002],
        [ 562.60770031,  487.39229969]]))

#### the pvalue is < .05 and there is a significant relationship between age groups and being adopted

# test assumptions of 5 cases per expected cell

#### all values are oer 5 so the assumption has been met

In [51]:
shelter1.DogAgeGroup.value_counts()

Adult         10407
Adolescent     8234
Mature         1887
Puppy          1399
Senior         1060
Geriatric      1038
Name: DogAgeGroup, dtype: int64

### create a contigency table dogs

In [52]:
shelter_crosstab = pd.crosstab(shelter1['DogAgeGroup'], shelter1['movementtype'])

In [53]:
shelter_crosstab

movementtype,0,1
DogAgeGroup,Unnamed: 1_level_1,Unnamed: 2_level_1
Adolescent,4345,3889
Adult,6184,4223
Geriatric,287,751
Mature,1026,861
Puppy,532,867
Senior,499,561


# run the Independent Chi Square for dogs

In [54]:
stats.chi2_contingency(shelter_crosstab)

(580.1479544959988,
 3.934079464568713e-123,
 5,
 array([[4411.91600416, 3822.08399584],
        [5576.24603538, 4830.75396462],
        [ 556.17789802,  481.82210198],
        [1011.08640999,  875.91359001],
        [ 749.60778356,  649.39221644],
        [ 567.96586889,  492.03413111]]))

#### pvalue is <0.05 and there is a significant relationship between animal age and being adopted

# test the assumptions

### all values are over 5 so the assumption has been met