## Will Earley Script

Thank you for this opportunity. I really enjoyed putting together this assignment. I focused on explainability here so that is why some commands are a bit more explicit or redundant than I typically would implement. Thanks again!

In [51]:
# import libraries and read in data
import pandas as pd

owners = pd.read_csv('/Users/williamearley/Broncos/owners.csv')
pets = pd.read_csv('/Users/williamearley/Broncos/pets.csv')
procedure_details = pd.read_csv('/Users/williamearley/Broncos/procedure_details.csv')
procedures = pd.read_csv('/Users/williamearley/Broncos/procedures.csv')

In [2]:
# Exploring the data
owners.head(5)

Unnamed: 0,OwnerID,Name,Surname,StreetAddress,City,State,StateFull,ZipCode
0,6049,Debbie,Metivier,315 Goff Avenue,Grand Rapids,MI,Michigan,49503
1,2863,John,Sebastian,3221 Perry Street,Davison,MI,Michigan,48423
2,3518,Connie,Pauley,1539 Cunningham Court,Bloomfield Township,MI,Michigan,48302
3,3663,Lena,Haliburton,4217 Twin Oaks Drive,Traverse City,MI,Michigan,49684
4,1070,Jessica,Velazquez,3861 Woodbridge Lane,Southfield,MI,Michigan,48034


In [4]:
pets.head(5)

Unnamed: 0,PetID,Name,Kind,Gender,Age,OwnerID
0,J6-8562,Brandy,Dog,male,11,5168
1,Q0-2001,Roomba,Cat,male,9,5508
2,M0-2904,Simba,Cat,male,1,3086
3,R3-7551,Keller,Parrot,female,2,7908
4,P2-7342,Cuddles,Dog,male,13,4378


In [6]:
procedure_details.head(5)

Unnamed: 0,ProcedureType,ProcedureSubCode,Description,Price
0,OFFICE FEES,1,Office Call,32
1,OFFICE FEES,2,Emergency,100
2,OFFICE FEES,3,Reck,24
3,GROOMING,1,Bath,15
4,GROOMING,2,Flea Dip,15


In [7]:
procedures.head(5)

Unnamed: 0,PetID,Date,ProcedureType,ProcedureSubCode
0,T0-3277,42391,VACCINATIONS,5
1,G6-6501,42398,VACCINATIONS,5
2,P9-3625,42399,ORTHOPEDIC,7
3,F1-1855,42402,VACCINATIONS,5
4,S1-2243,42404,VACCINATIONS,5


In [9]:
owners.shape

(89, 8)

In [8]:
pets.shape

(100, 6)

In [10]:
procedure_details.shape

(41, 4)

In [11]:
procedures.shape

(41, 4)

# Problem 1: What is the name of the oldest dog in Southfield

In [24]:
# Merge pet and owner dataframes

pets_owners = pd.merge(pets, owners, on="OwnerID", how="outer")

In [26]:
southfield = pets_owners[pets_owners['City'] == 'Southfield']
southfield = southfield.sort_values(by='Age', ascending=False)

In [27]:
pets_owners.shape

(100, 13)

In [28]:
southfield.head(5)

Unnamed: 0,PetID,Name_x,Kind,Gender,Age,OwnerID,Name_y,Surname,StreetAddress,City,State,StateFull,ZipCode
12,T2-2142,Stowe,Cat,female,15,1132,Rosa,Quarles,4791 Tennessee Avenue,Southfield,MI,Michigan,48034
85,P1-2578,Tiger,Cat,male,14,3034,Paul,Haring,2612 Tennessee Avenue,Southfield,MI,Michigan,48034
21,J0-7893,Crockett,Dog,male,12,1334,Jason,Cantwell,2372 Don Jackson Lane,Southfield,MI,Michigan,48075
34,T0-3277,Humbert,Cat,male,12,8133,Stacey,Randolph,1145 D Street,Southfield,MI,Michigan,48075
57,L4-2594,Newcastle,Cat,male,6,6405,Lawrence,Roder,1386 Hayhurst Lane,Southfield,MI,Michigan,48034


Here we see, the oldest dog is Crockett, at 12 years old.  

# Problem 2: What is the average (mean) number of pets per city?

In [38]:
# Using previous dataframe pets_owners and groupby
mean_pets = pets_owners.groupby("City").size().mean()
mean_pets

2.5

In [50]:
# Double checking a bit more basic way
cities_count = pets_owners["City"].unique()
pets_count = pets_owners["PetID"].unique()
mean_pets_doublecheck = len(pets_count) / len(cities_count)
mean_pets_doublecheck

2.5

# Problem 3: Which owner spend the most on procedures for their pet(s)?

In [65]:
# We need to do some merging here, all dataframes will be involved as they each hold a piece of the puzzle!

merged = pd.merge(pets, procedures, on="PetID", how='left')
merged = pd.merge(merged, owners, on="OwnerID", how='left')
merged = pd.merge(merged, procedure_details, on=['ProcedureType', "ProcedureSubCode"], how='left')

# Next we add up how much each owner spent on their pet
owner_total = merged.groupby(["OwnerID", 'Name_y', "Surname"])['Price'].sum().reset_index()

# Finally, we find the name of the person who spent the most on their pet(s)
highest_spender = owner_total.loc[owner_total['Price'].idxmax()]
highest_spender

OwnerID      8316
Name_y     Daniel
Surname       Fay
Price       450.0
Name: 76, dtype: object

# Problem 4: How many owners spent 20 dollars or more on procedures for their pets?

In [70]:
# Luckily we already have a data frame well designed to tackle this problem. We can easily just see how many owners
# spent 20 or more dollars on their pets.

big_spenders = (owner_total['Price'] >= 20).sum()
big_spenders

12

# Problem 5: How many owners have at least two different kinds of pets (e.g. a dog and a cat)?

In [80]:
# First we are going to group by OwnerID and Kind, only including unique values so we can see all the 
# owners that have different kinds of pets. 

owners_pet_counts = pets.groupby("OwnerID")["Kind"].nunique()

In [82]:
# Now we just need to see how man of these are greater than or equal to 2

two_or_more = owners_pet_counts[owners_pet_counts >= 2]

len(two_or_more)

4

# Problem 6: How many owners have pets where the first letter of their name (OwnerName) matches the first letter of their pet's name (PetName)? E.g. Cookie and Charles.

In [89]:
# Here we just need to merge pets and owners and then use str to match the letters.

pet_owners = pd.merge(pets, owners, on="OwnerID", how="inner")
cookie_charles_df = pet_owners[pet_owners['Name_x'].str[0].str.lower() == pet_owners["Name_y"].str[0].str.lower()]
cookie_charles = cookie_charles_df['OwnerID'].nunique()
cookie_charles

8

# Problem 7: What percentage of pets received a vaccination?

In [118]:
# Here we find out how many unique pets had a vaccination, and then divide that by the total number of pets.

vaccinated = procedures[procedures['ProcedureType'].str.contains('VACCINATIONS', case=False)]
unique_vac = vaccinated['PetID'].nunique()
unique_pets = pets['PetID'].nunique()
percent_vac = (unique_vac / unique_pets) * 100
percent_vac

24.0

# Problem 8: What percentage of cities have more male pets than female pets?

In [124]:
# Here we just group the pet_owners df by city and count each gender in each city.
# We see how many times the city has more males, and then calculate the overall percentage

city_pet_counts = pet_owners.groupby('City')['Gender'].value_counts().unstack().fillna(0)
more_males = (city_pet_counts['male'] > city_pet_counts['female']).sum()
total_cities = len(city_pet_counts)
percent = (more_males / total_cities) * 100
percent

57.49999999999999

# Problem 9: Which city's pet sample is made up of exactly 70% dogs? The answer is case sensitive, so please match the value for City exactly.


In [127]:
# To complete this final question, we just need to groupby the city and map where "Kind" is a dog. 
# Then we just see which city is equal to 70%

city_pet_percent = pet_owners.groupby('City')['Kind'].apply(lambda x: (x == 'Dog').mean() * 100)
seventy_city = city_pet_percent[city_pet_percent == 70].index.tolist()
seventy_city

['Grand Rapids']

In [132]:
# Double checking visually because I REALLY hope this opportunity works out
# I love Denver and the Broncos organization, we see here, 7/10 pets in Grand Rapids are dogs

gr = pet_owners[pet_owners['City'] == 'Grand Rapids']
gr

Unnamed: 0,PetID,Name_x,Kind,Gender,Age,OwnerID,Name_y,Surname,StreetAddress,City,State,StateFull,ZipCode
0,J6-8562,Brandy,Dog,male,11,5168,Robert,Foster,4680 Rubaiyat Road,Grand Rapids,MI,Michigan,49503
7,X0-8765,Vuitton,Parrot,female,11,7581,Florence,Nolen,3103 Howard Street,Grand Rapids,MI,Michigan,49503
10,M8-7852,Cookie,Cat,female,8,7606,Edna,Moreno,2548 Wetzel Lane,Grand Rapids,MI,Michigan,49503
16,G4-8096,Danger,Dog,male,9,9037,Roger,Lade,1315 Echo Lane,Grand Rapids,MI,Michigan,49503
36,L0-6660,Cookie,Dog,male,9,5806,Jerry,Reyna,1073 Rubaiyat Road,Grand Rapids,MI,Michigan,49503
39,J1-6366,Bruce,Dog,male,3,8316,Daniel,Fay,894 Twin Oaks Drive,Grand Rapids,MI,Michigan,49503
56,R7-4835,Cookie,Dog,male,12,5215,Larry,Hunt,3384 West Street,Grand Rapids,MI,Michigan,49503
65,I6-9459,Biscuit,Dog,female,4,6049,Debbie,Metivier,315 Goff Avenue,Grand Rapids,MI,Michigan,49503
79,K4-5012,Natacha,Parrot,female,11,2755,Anne,Hudson,4110 Howard Street,Grand Rapids,MI,Michigan,49503
82,K0-8177,Bruce,Dog,male,12,2809,Bruce,Dunne,4119 Kimberly Way,Grand Rapids,MI,Michigan,49503
