## AKC data countrywide
#### Micaela McCall

In [1]:
from pprint import pprint
import pandas as pd
import os

## Attribute data

In [2]:
coren = pd.read_csv('../dogdata/coren-edit.csv')
turcsan = pd.read_csv('../dogdata/turcsan.csv')

# Pare down the Coren data
coren = coren[['Breed', 'Obedient']]
# Combine the Coren and Turcsan datasets
attrib = coren.set_index('Breed').join(turcsan.set_index('Breed'), how='outer').reset_index()

## AKC data

In [3]:
def load_news_data():
    """
    A function to load data from data folder
    """
    files = [f for f in os.listdir('../dogdata/akc_data') if f.endswith(".csv")]
    file_list = []
    for filename in files:
        df = pd.read_csv(os.path.join('../dogdata/akc_data', filename))
        df['City']=filename[5:-4]
        file_list.append(df)
    df_full = pd.concat(file_list, join='outer').drop_duplicates().reset_index().drop(columns='index')
    return df_full

akc_full = load_news_data().drop(columns="Ranking").dropna()
akc_full = akc_full[akc_full.Breed != 'Grand Total']

#### Make each breed singular so that it matches with attribute dataset

In [4]:
breed_singular=[]
for row in akc_full.itertuples():
    if row.Breed[-1] == 's':
        breed_singular.append(row.Breed[:-1])
    else:
        breed_singular.append(row.Breed)
akc_full.Breed=breed_singular

Checking breeds to determine replacements

In [None]:
for breed in attrib.Breed:
    if breed.find('Fox Terrier') != -1:
        print(breed)
print('___________')
for breed in akc_full.Breed:
    if breed.find('Terriers') != -1:
        print(breed)

#### Replace breed names in AKC data to match those in attribute data

In [6]:
repl_dict= {'Spaniels (Cocker)': 'Cocker Spaniel', 
            'Spaniels (English Cocker)': 'English Cocker Spaniel',
            'Spaniels (American Water)': 'American Water Spaniel',
            'Spaniels (Clumber)': 'Clumber Spaniel', 
            'Spaniels (English Springer)': 'English Springer Spaniel', 
            'Spaniels (Field)': 'Field Spaniel', 
            'Spaniels (Irish Water)': 'Irish Water Spaniel', 
            'Spaniels (Welsh Springer)' : 'Welsh Springer Spaniel',
            'Retrievers (Chesapeake Bay)': 'Chesapeake Bay Retriever',
            'Retrievers (Curly-Coated)': 'Curly Coated Retriever',
            'Retrievers (Flat-Coated)': 'Flat-Coated Retriever',
            'Retrievers (Golden)': 'Golden Retriever',
            'Retrievers (Labrador)': 'Labrador Retriever',
            'Pointers (German Shorthaired)': 'German Shorthaired Pointer', 
            'Pointers (German Wirehaired)': 'German Wirehaired Pointer', 
            'Setters (English)': 'English Setter', 
            'Setters (Gordon)': 'Gordon Setter', 
            'Setters (Irish)': 'Irish Setter', 
            'Fox Terriers (Wire)': 'Wire Fox Terrier', 
            'Fox Terriers (Smooth)': 'Fox Terrier'}
akc_full.Breed=akc_full.Breed.replace(repl_dict)

## Combine AKC and attribute data

In [7]:
akc_attrib = akc_full.set_index('Breed').join(attrib.set_index('Breed'), how='left').reset_index()

## Print average attribute per city

In [8]:
mean_attrib_per_breed = akc_attrib[['Breed', 'City', 'Obedient', 'Trainable', 'Bold', 'Calm', 'Sociable']].groupby('City').mean().round(decimals=2).dropna()
mean_attrib_per_breed

Unnamed: 0_level_0,Obedient,Trainable,Bold,Calm,Sociable
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Albany NY,0.63,0.73,0.48,0.45,0.50
Atlanta GA,0.58,0.70,0.40,0.43,0.46
Austin TX,0.58,0.64,0.43,0.49,0.48
Baltimore MD,0.61,0.66,0.46,0.47,0.48
Boston MA,0.58,0.66,0.45,0.46,0.47
Buffalo NY,0.59,0.61,0.49,0.50,0.50
Charlotte NC,0.59,0.64,0.47,0.51,0.52
Chicago IL,0.58,0.65,0.43,0.47,0.47
Cincinnati OH,0.58,0.67,0.42,0.46,0.48
Cleveland OH,0.60,0.62,0.43,0.49,0.48


## Print number of dogs per city
#### Select a city or breed

In [9]:
print('Cities to chose from: \n',akc_attrib.City.unique(), '\n \n Breeds to chose from: \n', akc_attrib.Breed.unique())

Cities to chose from: 
 ['Austin TX' 'New York City NY - Bronx_Kings_New York_Queens_Richmond'
 'Indianapolis IN' 'Baltimore MD' 'Tampa FL' 'Buffalo NY' 'Atlanta GA'
 'Long Island NY - Suffolk_Nassau' 'Dallas TX' 'Seattle WA' 'NY Tribeca'
 'Charlotte NC' 'Phoenix AZ' 'San Francisco CA' 'San Diego CA' 'Miami FL'
 'San Antonio TX' 'Fort Worth TX' 'Tucson AZ' 'Pittsburgh PA' 'Orlando FL'
 'Saint Louis MO' 'NY Williamsburg Brooklyn' 'Sacramento CA'
 'Los Angeles CA' 'Boston MA' 'Northern NJ - Bergen_Hudson_Passaic'
 'Cincinnati OH' 'Denver CO' 'Jacksonville FL'
 'Raleigh NC - Wake_Durham_Orange' 'Houston TX' 'Chicago IL'
 'Salt Lake City UT' 'Milwaukee WI' 'Albany NY' 'Portland OR'
 'Newark NJ - Essex' 'Washington DC' 'Hartford CT' 'Nashville TN'
 'NY New Dorp Staten Island' 'Detroit MI' 'Philadelphia PA' 'NY Chelsea'
 'Kansas City MO' 'NY East Village' 'Columbus OH' 'Richmond VA'
 'Las Vegas NV' 'NY Upper West Side' 'Cleveland OH' 'West Palm Beach FL'
 'NY Washington Heights' 'Oakland CA'

In [10]:
city = ''
breed = ''

In [11]:
total_breed_per_city = akc_attrib[['Breed', 'City', 'Reg Count']].groupby(['Breed','City']).sum().round(decimals=2).reset_index()

if (len(city) > 0) & (len(breed) > 0):
    print(breed, 'in', city, '\n', total_breed_per_city[(total_breed_per_city.City == city) & (total_breed_per_city.Breed == breed)][['Reg Count']])
elif len(city) > 0:
    print(city, '\n', total_breed_per_city[total_breed_per_city.City == city][['Breed', 'Reg Count']])
elif len(breed) >0:
    print(breed, '\n', total_breed_per_city[total_breed_per_city.Breed == breed][['City', 'Reg Count']])
else:
    print(total_breed_per_city)      

                  Breed                                               City  \
0         Affenpinscher                                         Atlanta GA   
1         Affenpinscher                                          Austin TX   
2         Affenpinscher                                       Baltimore MD   
3         Affenpinscher                                         Buffalo NY   
4         Affenpinscher                                          Dallas TX   
5         Affenpinscher                                    Indianapolis IN   
6         Affenpinscher                    Long Island NY - Suffolk_Nassau   
7         Affenpinscher                                         NY Tribeca   
8         Affenpinscher  New York City NY - Bronx_Kings_New York_Queens...   
9         Affenpinscher                                         Seattle WA   
10        Affenpinscher                                           Tampa FL   
11         Afghan Hound                                         