# Import packages

In [1]:
import pandas as pd
from scipy import stats

# Evaluation Question #1: 
# What time of year are cats and dogs more likely to get adopted? 

# Read in data

In [2]:
shelter = pd.read_csv(r'C:\Users\ardit\Desktop\Final-Project\shelter.csv')
pd.set_option('display.max_columns', None)
shelter.head()

Unnamed: 0,id,intakereason,breedname,speciesname,sexname,movementtype,intakeDate,movementDate,CatAgeGroup,DogAgeGroup,movementMonth,movementDay,movementYear,intakeMonth,intakeDay,intakeYear
0,15801,Moving,Domestic Short Hair,Cat,Female,Adoption,11/28/2009,05/13/2017,Senior,Senior,5,13,2017,11,28,2009
1,15932,Moving,German Shepherd Dog/Mix,Dog,Male,Adoption,12/08/2009,04/24/2017,Mature,Senior,4,24,2017,12,8,2009
2,28859,Abandoned,Shep Mix/Siberian Husky,Dog,Female,Adoption,08/10/2012,04/15/2017,Mature,Mature,4,15,2017,8,10,2012
3,28859,Abandoned,Shep Mix/Siberian Husky,Dog,Female,Reclaimed,08/10/2012,02/29/2020,Mature,Mature,2,29,2020,8,10,2012
4,30812,Abandoned,Domestic Short Hair,Cat,Female,Foster,01/11/2013,04/18/2017,Mature,Senior,4,18,2017,1,11,2013


# Recode adoptions to 1 and all other movement types to 0

In [3]:
dictionaryMovement = { 'movementtype': 
{ "Adoption": 1,
  "Escaped": 0,
  "Foster": 0,
  "Reclaimed": 0,
  "Released To Wild": 0,
  "Stolen": 0,
  "Transfer": 0 }}
shelter.replace(dictionaryMovement, inplace=True)

In [4]:
shelter.head()

Unnamed: 0,id,intakereason,breedname,speciesname,sexname,movementtype,intakeDate,movementDate,CatAgeGroup,DogAgeGroup,movementMonth,movementDay,movementYear,intakeMonth,intakeDay,intakeYear
0,15801,Moving,Domestic Short Hair,Cat,Female,1,11/28/2009,05/13/2017,Senior,Senior,5,13,2017,11,28,2009
1,15932,Moving,German Shepherd Dog/Mix,Dog,Male,1,12/08/2009,04/24/2017,Mature,Senior,4,24,2017,12,8,2009
2,28859,Abandoned,Shep Mix/Siberian Husky,Dog,Female,1,08/10/2012,04/15/2017,Mature,Mature,4,15,2017,8,10,2012
3,28859,Abandoned,Shep Mix/Siberian Husky,Dog,Female,0,08/10/2012,02/29/2020,Mature,Mature,2,29,2020,8,10,2012
4,30812,Abandoned,Domestic Short Hair,Cat,Female,0,01/11/2013,04/18/2017,Mature,Senior,4,18,2017,1,11,2013


In [5]:
### Remove Unknown variables in age groups for better filtering
# shelter1 = shelter[(shelter.CatAgeGroup != "Unknown") & (shelter.DogAgeGroup != "Unknown")]
# shelter1.head()

## Create a contingency table for both cats and dogs

In [6]:
shelter_crosstab = pd.crosstab(shelter['movementMonth'], shelter['movementtype'])
shelter_crosstab

movementtype,0,1
movementMonth,Unnamed: 1_level_1,Unnamed: 2_level_1
1,369,793
2,330,530
3,428,538
4,592,538
5,822,697
6,885,897
7,977,1089
8,780,1296
9,773,1025
10,750,1091


## Running the Independent Chi-Square for cats and dogs

In [7]:
stats.chi2_contingency(shelter_crosstab)

(387.42450111561885,
 2.853267316015109e-76,
 11,
 array([[ 492.80790523,  669.19209477],
        [ 364.72874225,  495.27125775],
        [ 409.68368025,  556.31631975],
        [ 479.23660319,  650.76339681],
        [ 644.21274358,  874.78725642],
        [ 755.7518822 , 1026.2481178 ],
        [ 876.19718778, 1189.80281222],
        [ 880.43821966, 1195.56178034],
        [ 762.53753322, 1035.46246678],
        [ 780.77397033, 1060.22602967],
        [ 628.5209256 ,  853.4790744 ],
        [ 586.11060673,  795.88939327]]))

### The p value is < 0.5 and there is a significant relationship between months and movement types (adopted or not adopted) for both cats and dogs.

## Test the Assumption of 5 Cases per Expected Cell
### All the values are over 5, so the assumption has been met.

### Results: The month of August (8) has the highest rate of adoptions with 1296 adoptions total for both cats and dogs.

## Create a contingency table for cats only

In [8]:
Cats = shelter[(shelter['speciesname'].isin(['Cat']))]
Cats

Unnamed: 0,id,intakereason,breedname,speciesname,sexname,movementtype,intakeDate,movementDate,CatAgeGroup,DogAgeGroup,movementMonth,movementDay,movementYear,intakeMonth,intakeDay,intakeYear
0,15801,Moving,Domestic Short Hair,Cat,Female,1,11/28/2009,05/13/2017,Senior,Senior,5,13,2017,11,28,2009
4,30812,Abandoned,Domestic Short Hair,Cat,Female,0,01/11/2013,04/18/2017,Mature,Senior,4,18,2017,1,11,2013
5,30812,Abandoned,Domestic Short Hair,Cat,Female,1,01/11/2013,05/29/2018,Mature,Senior,5,29,2018,1,11,2013
11,46437,Abandoned,Domestic Long Hair,Cat,Female,0,10/26/2016,03/25/2017,Senior,Geriatric,3,25,2017,10,26,2016
12,46437,Abandoned,Domestic Long Hair,Cat,Female,1,10/26/2016,04/07/2017,Senior,Geriatric,4,7,2017,10,26,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18054,72229,Stray,Domestic Short Hair,Cat,Male,1,01/13/2022,01/21/2022,Kitten,Puppy,1,21,2022,1,13,2022
18055,72230,Stray,Domestic Short Hair,Cat,Female,1,01/13/2022,01/24/2022,Adult,Adult,1,24,2022,1,13,2022
18056,72232,Incompatible with owner lifestyle,Domestic Long Hair,Cat,Male,1,01/13/2022,01/13/2022,Adult,Adult,1,13,2022,1,13,2022
18057,72243,Stray,Domestic Short Hair,Cat,Male,1,01/14/2022,01/21/2022,Adult,Adult,1,21,2022,1,14,2022


In [9]:
CatsCross = pd.crosstab(Cats['movementMonth'], Cats['movementtype'])
CatsCross

movementtype,0,1
movementMonth,Unnamed: 1_level_1,Unnamed: 2_level_1
1,167,504
2,113,300
3,154,254
4,306,269
5,546,364
6,614,592
7,706,752
8,532,942
9,511,721
10,499,735


## Running the Independent Chi-Square for cats only

In [10]:
stats.chi2_contingency(CatsCross)

(509.31573382018445,
 3.3048804340577207e-102,
 11,
 array([[274.78489742, 396.21510258],
        [169.12989961, 243.87010039],
        [167.08232213, 240.91767787],
        [235.47140986, 339.52859014],
        [372.65910083, 537.34089917],
        [493.87568747, 712.12431253],
        [597.07359232, 860.92640768],
        [603.62584024, 870.37415976],
        [504.52309035, 727.47690965],
        [505.34212134, 728.65787866],
        [411.1535574 , 592.8464426 ],
        [356.27848101, 513.72151899]]))

### The p value is < 0.5 and there is a significant relationship between months and movement types (adopted or not adopted) for cats only.

## Test the Assumption of 5 Cases per Expected Cell
### All the values are over 5, so the assumption has been met.

### Results: The month of August (8) has the highest rate of adoptions with 942 adoptions total for cats only.

## Create a contingency table for dogs only

In [11]:
Dogs = shelter[(shelter['speciesname'].isin(['Dog']))]
Dogs

Unnamed: 0,id,intakereason,breedname,speciesname,sexname,movementtype,intakeDate,movementDate,CatAgeGroup,DogAgeGroup,movementMonth,movementDay,movementYear,intakeMonth,intakeDay,intakeYear
1,15932,Moving,German Shepherd Dog/Mix,Dog,Male,1,12/08/2009,04/24/2017,Mature,Senior,4,24,2017,12,8,2009
2,28859,Abandoned,Shep Mix/Siberian Husky,Dog,Female,1,08/10/2012,04/15/2017,Mature,Mature,4,15,2017,8,10,2012
3,28859,Abandoned,Shep Mix/Siberian Husky,Dog,Female,0,08/10/2012,02/29/2020,Mature,Mature,2,29,2020,8,10,2012
6,31469,Incompatible with owner lifestyle,Basenji/Mix,Dog,Female,1,03/26/2013,03/30/2013,Mature,Mature,3,30,2013,3,26,2013
7,31469,Incompatible with owner lifestyle,Basenji/Mix,Dog,Female,1,03/26/2013,05/09/2017,Mature,Mature,5,9,2017,3,26,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18058,72245,Stray,Australian Cattle Dog/Bully Breed Mix,Dog,Male,0,01/14/2022,01/15/2022,Adolescent,Adolescent,1,15,2022,1,14,2022
18059,72252,Stray,Beagle,Dog,Female,0,01/15/2022,01/16/2022,Geriatric,Geriatric,1,16,2022,1,15,2022
18060,72253,Owner requested Euthanasia,Chihuahua,Dog,Female,0,01/15/2022,01/20/2022,Geriatric,Geriatric,1,20,2022,1,15,2022
18061,72253,Owner requested Euthanasia,Chihuahua,Dog,Female,1,01/15/2022,01/24/2022,Geriatric,Geriatric,1,24,2022,1,15,2022


In [12]:
DogsCross = pd.crosstab(Dogs['movementMonth'], Dogs['movementtype'])
DogsCross

movementtype,0,1
movementMonth,Unnamed: 1_level_1,Unnamed: 2_level_1
1,202,289
2,217,230
3,274,284
4,286,269
5,276,333
6,271,305
7,271,337
8,248,354
9,262,304
10,251,356


## Running the Independent Chi-Square for dogs only

In [13]:
stats.chi2_contingency(DogsCross)

(32.726284680260164,
 0.0005826564501742283,
 11,
 array([[220.64911484, 270.35088516],
        [200.87607808, 246.12392192],
        [250.75805719, 307.24194281],
        [249.4098956 , 305.5901044 ],
        [273.67680436, 335.32319564],
        [258.84702678, 317.15297322],
        [273.22741716, 334.77258284],
        [270.53109396, 331.46890604],
        [254.35315479, 311.64684521],
        [272.77802996, 334.22197004],
        [214.80708125, 263.19291875],
        [230.08624603, 281.91375397]]))

### The p value is < 0.5 and there is a significant relationship between months and movement types (adopted or not adopted) for dogs only.

## Test the Assumption of 5 Cases per Expected Cell
### All the values are over 5, so the assumption has been met.

### Results: The months of August (8) and October (10) have the highest rate of adoptions with 354 and 356 adoptions respectively for dogs only.

In [6]:
# shelter1.to_csv(r'C:\Users\ardit\Desktop\python_course\Final Project\ShelterFinal.csv', index = False)