In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import seaborn as sns
import requests
import json

# linear modelling
import statsmodels.api as sm
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression

gkey = 'AIzaSyBxkE1f673VyrEGjxrVexsfLA72MGZ3H0Y'

In [2]:
# data
filePath = "../data/aac_intakes_outcomes_new.csv"
fullData = pd.read_csv(filePath)

In [3]:
fullData.head()

Unnamed: 0,animal_id,animal_type,breed,age_upon_outcome,outcome_type,sex_upon_outcome,age_upon_outcome_(days),age_upon_outcome_(years),age_upon_outcome_age_group,outcome_datetime,...,sex_upon_intake,age_upon_intake_(days),age_upon_intake_(years),age_upon_intake_age_group,intake_datetime,intake_month,intake_year,intake_monthyear,intake_weekday,time_in_shelter_days
0,A006100,Dog,Spinone Italiano Mix,10 years,Return to Owner,Neutered Male,3650,10.0,"(7.5, 10.0]",12/7/2017 14:07,...,Neutered Male,3650,10.0,"(7.5, 10.0]",12/7/2017 0:00,12,2017,2017-12,Thursday,0.588194
1,A006100,Dog,Spinone Italiano Mix,7 years,Return to Owner,Neutered Male,2555,7.0,"(5.0, 7.5]",12/20/2014 16:35,...,Neutered Male,2555,7.0,"(5.0, 7.5]",12/19/2014 10:21,12,2014,2014-12,Friday,1.259722
2,A006100,Dog,Spinone Italiano Mix,6 years,Return to Owner,Neutered Male,2190,6.0,"(5.0, 7.5]",3/8/2014 17:10,...,Neutered Male,2190,6.0,"(5.0, 7.5]",3/7/2014 14:26,3,2014,2014-03,Friday,1.113889
3,A047759,Dog,Dachshund,10 years,Transfer,Neutered Male,3650,10.0,"(7.5, 10.0]",4/7/2014 15:12,...,Neutered Male,3650,10.0,"(7.5, 10.0]",4/2/2014 15:55,4,2014,2014-04,Wednesday,4.970139
4,A134067,Dog,Shetland Sheepdog,16 years,Return to Owner,Neutered Male,5840,16.0,"(15.0, 17.5]",11/16/2013 11:54,...,Neutered Male,5840,16.0,"(15.0, 17.5]",11/16/2013 9:02,11,2013,2013-11,Saturday,0.119444


In [4]:
print(fullData.columns)

Index(['animal_id', 'animal_type', 'breed', 'age_upon_outcome', 'outcome_type',
       'sex_upon_outcome', 'age_upon_outcome_(days)',
       'age_upon_outcome_(years)', 'age_upon_outcome_age_group',
       'outcome_datetime', 'outcome_month', 'outcome_year',
       'outcome_monthyear', 'outcome_weekday', 'date_of_birth', 'dob_year',
       'dob_month', 'dob_monthyear', 'age_upon_intake', 'color',
       'found_location', 'intake_condition', 'intake_type', 'sex_upon_intake',
       'age_upon_intake_(days)', 'age_upon_intake_(years)',
       'age_upon_intake_age_group', 'intake_datetime', 'intake_month',
       'intake_year', 'intake_monthyear', 'intake_weekday',
       'time_in_shelter_days'],
      dtype='object')


In [5]:
mask = (fullData.animal_type == "Dog") | (fullData.animal_type == "Cat") 
tempData = fullData.loc[mask]

mask2 = tempData.intake_type != "Euthanasia Request"
tempData2 = tempData.loc[mask2]

mask3 = (tempData2.intake_year != 2013) & (tempData2.intake_year != 2018)
tempData3 = tempData2.loc[mask3]

cleanData = tempData3.replace({"Rto-Adopt" : "Return to Owner", "Missing": "Other",
                  "Euthanasia": "Other","Died": "Other", "Disposal":"Other"})

In [6]:
addresses = cleanData['found_location'].unique()

In [7]:
len(addresses)

30747

In [8]:
addresses1 = addresses[0:100]
len(addresses1)

100

In [9]:
# create empty lists to store data
latitudes = []
longitudes = []
scraped_locations = []

# requst information for each city
for address in addresses:
    target_url= f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={gkey}"
    response = requests.get(target_url)
    
    if response.status_code == 200:    
        data = response.json()
        
        try:
            # extract all data points before appending (don't append twice)
            latitude = data["results"][0]["geometry"]["location"]["lat"]
            longitude = data["results"][0]["geometry"]["location"]["lng"]
            location = address
            
            # append to lists
            latitudes.append(latitude)
            longitudes.append(longitude)
            scraped_locations.append(location)
            
        except: # we failed to extract all data points
            latitudes.append(np.nan)
            longitudes.append(np.nan)
            scraped_locations.append(location)
            
    else:
        latitudes.append(np.nan)
        longitudes.append(np.nan)
        scraped_locations.append(location)


In [10]:
len(scraped_locations)

30747

In [11]:
len(latitudes)

30747

In [12]:
len(longitudes)

30747

In [15]:
longitudes

[-97.7113114,
 -97.7191588,
 -97.7191588,
 -97.7430608,
 -97.7181325,
 -97.8518826,
 -97.8612052,
 -97.7641235,
 -97.7035501,
 nan,
 -97.7369659,
 -97.64894079999999,
 -97.6899024,
 -97.725079,
 -94.7326034,
 -97.8663143,
 -97.69337120000002,
 -97.71939379999999,
 -97.7146714,
 -85.578355,
 -97.7197545,
 -97.7688783,
 -97.7803631,
 -97.6967876,
 -97.4334667,
 -97.746409,
 -97.74052809999999,
 -97.6904186,
 -97.69822719999999,
 -97.68884639999999,
 nan,
 -97.8151833,
 -97.8157227,
 -97.8285985,
 -97.73926809999999,
 -97.70706829999999,
 -97.84227519999999,
 -97.7696258,
 -97.68173089999999,
 -97.7986926,
 -97.7786647,
 -97.7006023,
 -97.725673,
 -97.56727629999999,
 -97.5668953,
 -97.69400730000001,
 -97.7657792,
 -97.80476329999999,
 -97.84839749999999,
 -97.830995,
 -97.749048,
 -97.9539494,
 -97.7462406,
 -97.754052,
 -97.6311468,
 -97.76913479999999,
 -97.6820486,
 -97.7390604,
 -97.76117780000001,
 -97.77298599999999,
 -72.700733,
 -97.7143893,
 -97.74789489999999,
 -97.7561703,
 -

In [16]:
geocode_data = pd.DataFrame()

geocode_data["location_found"] = scraped_locations
geocode_data["latitude"] = latitudes
geocode_data["longitude"] = longitudes

In [17]:
geocode_data

Unnamed: 0,location_found,latitude,longitude
0,"Colony Creek And Hunters Trace, Austin, TX",30.366449,-97.711311
1,"8700 Research Blvd, Austin, TX",30.364744,-97.719159
2,"8700 Research, Austin, TX",30.364744,-97.719159
3,"Austin, TX",30.267153,-97.743061
4,"Ih 35 And 41St St, Austin, TX",30.297646,-97.718132
...,...,...,...
30742,"10311 Georgian Dr, Austin, TX",30.369045,-97.687137
30743,"Walnut Drive East And Georgian Drive, Austin, TX",30.351062,-97.700791
30744,"7Th Street And Chicon, Austin, TX",30.262560,-97.722131
30745,"924 E 51St, Austin, TX",30.311146,-97.712745


In [19]:
cleanData2 = cleanData.merge(geocode_data, how='left', left_on="found_location", right_on="location_found")

In [20]:
cleanData2

Unnamed: 0,animal_id,animal_type,breed,age_upon_outcome,outcome_type,sex_upon_outcome,age_upon_outcome_(days),age_upon_outcome_(years),age_upon_outcome_age_group,outcome_datetime,...,age_upon_intake_age_group,intake_datetime,intake_month,intake_year,intake_monthyear,intake_weekday,time_in_shelter_days,location_found,latitude,longitude
0,A006100,Dog,Spinone Italiano Mix,10 years,Return to Owner,Neutered Male,3650,10.000000,"(7.5, 10.0]",12/7/2017 14:07,...,"(7.5, 10.0]",12/7/2017 0:00,12,2017,2017-12,Thursday,0.588194,"Colony Creek And Hunters Trace, Austin, TX",30.366449,-97.711311
1,A006100,Dog,Spinone Italiano Mix,7 years,Return to Owner,Neutered Male,2555,7.000000,"(5.0, 7.5]",12/20/2014 16:35,...,"(5.0, 7.5]",12/19/2014 10:21,12,2014,2014-12,Friday,1.259722,"8700 Research Blvd, Austin, TX",30.364744,-97.719159
2,A006100,Dog,Spinone Italiano Mix,6 years,Return to Owner,Neutered Male,2190,6.000000,"(5.0, 7.5]",3/8/2014 17:10,...,"(5.0, 7.5]",3/7/2014 14:26,3,2014,2014-03,Friday,1.113889,"8700 Research, Austin, TX",30.364744,-97.719159
3,A047759,Dog,Dachshund,10 years,Transfer,Neutered Male,3650,10.000000,"(7.5, 10.0]",4/7/2014 15:12,...,"(7.5, 10.0]",4/2/2014 15:55,4,2014,2014-04,Wednesday,4.970139,"Austin, TX",30.267153,-97.743061
4,A163459,Dog,Miniature Schnauzer Mix,15 years,Return to Owner,Intact Female,5475,15.000000,"(12.5, 15.0]",11/14/2014 19:28,...,"(12.5, 15.0]",11/14/2014 15:11,11,2014,2014-11,Friday,0.178472,"Ih 35 And 41St St, Austin, TX",30.297646,-97.718132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68812,A764534,Dog,Dachshund Mix,1 year,Adoption,Spayed Female,365,1.000000,"(-0.025, 2.5]",1/5/2018 16:55,...,"(-0.025, 2.5]",12/31/2017 16:29,12,2017,2017-12,Sunday,5.018056,"Walnut Drive East And Georgian Drive, Austin, TX",30.351062,-97.700791
68813,A764535,Dog,Lhasa Apso Mix,1 year,Transfer,Intact Male,365,1.000000,"(-0.025, 2.5]",1/4/2018 15:10,...,"(-0.025, 2.5]",12/31/2017 16:54,12,2017,2017-12,Sunday,3.927778,"124 West Anderson Lane, Austin, TX",30.344681,-97.705226
68814,A764536,Dog,Chihuahua Shorthair Mix,2 months,Adoption,Spayed Female,60,0.164384,"(-0.025, 2.5]",1/25/2018 17:34,...,"(-0.025, 2.5]",12/31/2017 17:05,12,2017,2017-12,Sunday,25.020139,"7Th Street And Chicon, Austin, TX",30.262560,-97.722131
68815,A764537,Dog,Chihuahua Shorthair Mix,3 months,Adoption,Intact Male,90,0.246575,"(-0.025, 2.5]",2/13/2018 16:13,...,"(-0.025, 2.5]",12/31/2017 7:26,12,2017,2017-12,Sunday,44.365972,"924 E 51St, Austin, TX",30.311146,-97.712745


In [21]:
cleanData2.to_json("acc_data.json", orient = "records")