# Import packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import statsmodels
import scipy
from scipy import stats
from scipy.stats import boxcox
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multicomp import MultiComparison
from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.proportion import proportions_chisquare

# Read in data

In [2]:
shelter = pd.read_csv(r'C:\Users\ardit\Desktop\Final-Project\MasterShelter.csv')
pd.set_option('display.max_columns', None)
shelter.head()

Unnamed: 0,speciesname,intakeMonth,intakeMonthR,id,movementMonth,DogAgeGroup,intakeYear,movementDate1R,intakereason,movementYear,movementtype,sexname,CatAgeGroup
0,Cat,11,February,15801,5,Senior,2009,May,Moving,2017,Adoption,Female,Senior
1,Dog,12,February,15932,4,Senior,2009,April,Moving,2017,Adoption,Male,Mature
2,Dog,8,February,28859,4,Mature,2012,April,Abandoned,2017,Adoption,Female,Mature
3,Dog,8,February,28859,2,Mature,2012,February,Abandoned,2020,Reclaimed,Female,Mature
4,Cat,1,January,30812,4,Senior,2013,April,Abandoned,2017,Foster,Female,Mature


# Recode adoptions to 0 and all other movement types to 1

In [3]:
dictionaryMovement = { 'movementtype': 
{ "Adoption": 0,
  "Escaped": 1,
  "Foster": 1,
  "Reclaimed": 1,
  "Released To Wild": 1,
  "Stolen": 1,
  "Transfer": 1 }}
shelter.replace(dictionaryMovement, inplace=True)

In [4]:
shelter.head()

Unnamed: 0,speciesname,intakeMonth,intakeMonthR,id,movementMonth,DogAgeGroup,intakeYear,movementDate1R,intakereason,movementYear,movementtype,sexname,CatAgeGroup
0,Cat,11,February,15801,5,Senior,2009,May,Moving,2017,0,Female,Senior
1,Dog,12,February,15932,4,Senior,2009,April,Moving,2017,0,Male,Mature
2,Dog,8,February,28859,4,Mature,2012,April,Abandoned,2017,0,Female,Mature
3,Dog,8,February,28859,2,Mature,2012,February,Abandoned,2020,1,Female,Mature
4,Cat,1,January,30812,4,Senior,2013,April,Abandoned,2017,1,Female,Mature


# Remove Unknown variables in age groups for better filtering

In [8]:
shelter1 = shelter[(shelter.CatAgeGroup != "Unknown") & (shelter.DogAgeGroup != "Unknown")]
shelter1.head()

Unnamed: 0,speciesname,intakeMonth,intakeMonthR,id,movementMonth,DogAgeGroup,intakeYear,movementDate1R,intakereason,movementYear,movementtype,sexname,CatAgeGroup
0,Cat,11,February,15801,5,Senior,2009,May,Moving,2017,0,Female,Senior
1,Dog,12,February,15932,4,Senior,2009,April,Moving,2017,0,Male,Mature
2,Dog,8,February,28859,4,Mature,2012,April,Abandoned,2017,0,Female,Mature
3,Dog,8,February,28859,2,Mature,2012,February,Abandoned,2020,1,Female,Mature
4,Cat,1,January,30812,4,Senior,2013,April,Abandoned,2017,1,Female,Mature


## Create a contingency table

In [9]:
shelter_crosstab = pd.crosstab(shelter1['movementDate1R'], shelter['movementtype'])
shelter_crosstab

movementtype,0,1
movementDate1R,Unnamed: 1_level_1,Unnamed: 2_level_1
April,740,863
August,1515,1122
December,1248,603
February,691,475
January,993,534
July,1312,1495
June,1093,1293
March,750,627
May,872,1174
November,1112,773


## Running the Independent Chi-Square

In [10]:
stats.chi2_contingency(shelter_crosstab)

(522.7618177406488,
 4.4678838471241955e-105,
 11,
 array([[ 858.91442248,  744.08557752],
        [1412.94905307, 1224.05094693],
        [ 991.79700312,  859.20299688],
        [ 624.76245578,  541.23754422],
        [ 818.19234131,  708.80765869],
        [1504.03791883, 1302.96208117],
        [1278.45902185, 1107.54097815],
        [ 737.8198127 ,  639.1801873 ],
        [1096.28129032,  949.71870968],
        [1010.01477627,  874.98522373],
        [1287.56790843, 1115.43209157],
        [1252.20399584, 1084.79600416]]))

### The p value is < 0.5 and there is a significant relationship between months and movement types (adopted or not adopted).

## Test the Assumption of 5 Cases per Expected Cell

### All the values are over 5, so the assumption has been met.