In [1]:
import pandas as pd
import io
import requests
import matplotlib.pyplot as plt
from fastai.tabular.all import *

In [2]:
# Gather the data
data_url = "https://data.sonomacounty.ca.gov/api/views/924a-vesw/rows.csv?accessType=DOWNLOAD"
s = requests.get(data_url).content
data = pd.read_csv(io.StringIO(s.decode('utf-8')))
data.head()

For more information about the data, visit https://data.sonomacounty.ca.gov/Government/Animal-Shelter-Intake-and-Outcome/924a-vesw

In [3]:
data.describe(include=["object"])

In [4]:
# Get a list of the columns
list(data)

In [5]:
# unique outcome conditions
list(data['Outcome Type'].unique())

In [6]:
outcome = data.groupby(['Type', 'Outcome Type'])['Count'].count()
outcome.head(100)

In [7]:
total = data.groupby(['Type'])['Count'].count()
total.head()

In [8]:
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99']
fig1, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 7))
fig1.suptitle('Adoption Outcomes for each Animal Type', fontsize=20)

# Cats
cats = data[data["Type"] == "CAT"]
cats_grouped = cats.groupby(["Outcome Type"])["Count"].count()
ax1.pie(list(cats_grouped), labels=list(cats_grouped.index.values), autopct='%1.1f%%', startangle=90, colors=colors, shadow=True, pctdistance=0.85)
ax1.set_title("Cats")

# Dogs
dogs = data[data["Type"] == "DOG"]
dogs_grouped = dogs.groupby(["Outcome Type"])["Count"].count()
ax2.pie(list(dogs_grouped), labels=list(dogs_grouped.index.values), autopct='%1.1f%%', startangle=90, colors=colors, shadow=True, pctdistance=0.85)
ax2.set_title("Dogs")

# Other
other = data[data["Type"] == "OTHER"]
other_grouped = other.groupby(["Outcome Type"])["Count"].count()
ax3.pie(list(other_grouped), labels=list(other_grouped.index.values), autopct='%1.1f%%', startangle=90, colors=colors, shadow=True, pctdistance=0.85)
ax3.set_title("Other")


plt.show()


In [9]:
# Plot adoption rates for each animal type

series = pd.Series(index=["Cats", "Dogs", "Other"])

cats_grouped.head()
series["Cats"] = cats_grouped["ADOPTION"] / cats_grouped.sum() * 100
series["Dogs"] = dogs_grouped["ADOPTION"] / dogs_grouped.sum() * 100
series["Other"] = other_grouped["ADOPTION"] / other_grouped.sum() * 100

series.plot(kind="bar")

In [10]:
# dls = TabularDataLoaders.from_df(data, y_names="Outcome Type",
#                                 cat_names=["Type", "Breed", "Color", "Sex", "Intake Type"],
#                                 procs=[Categorify, FillMissing, Normalize])

splits = RandomSplitter(valid_pct=0.2)(range_of(data))
to = TabularPandas(data, procs=[Categorify],
                  cat_names=["Type", "Breed", "Color", "Sex", "Intake Type"],
                  y_names="Days in Shelter",
                  splits=splits)

In [11]:
to.xs.iloc[:3]

In [12]:
dls = to.dataloaders(bs=64)
dls.show_batch()

In [13]:
learn = tabular_learner(dls, metrics=accuracy)
learn.lr_find()

In [14]:
learn.fit_one_cycle(1)

In [15]:
learn.show_results()