In [108]:
import pandas as pd
import numpy as np
import import_ipynb

import base
from base import get_main_city_data, get_city_zip_data, get_top_complaints, get_key_complaints

In [109]:
import boroughs
from boroughs import get_borough_zipcodes

In [110]:
def get_zipcode_populations(zip_data_df, valid_zipcodes):
    zipcodes_table = {}
    zipcodes = zip_data_df.Zip
    populations = zip_data_df.Population

    for i in range (len(zip_data_df.Zip.keys())):
        if zipcodes[i] in valid_zipcodes:
            zipcodes_table[zipcodes[i]] = populations[i]
    return zipcodes_table

In [111]:
def get_most_populated_zipcodes(zipcodes_table, valid_zipcodes, count=10):
    all_valid_zipcodes = get_zipcode_populations(zip_data_df, valid_zipcodes)
    populated_zipcodes = pd.Series(all_valid_zipcodes).sort_values(ascending=False)[0:count]
    return populated_zipcodes

In [112]:
def get_zipcodes_table(main_data_df, zipcodes):
    zipcodes_table = {}
    for zipcode in zipcodes:
        zipcodes_table[zipcode] = main_data_df.loc[main_data_df.Zip == zipcode]
    return zipcodes_table

In [113]:
def calculate_zipcode_complainers(main_data_df, zipcodes, complaints):

    # Dict of all the boroughs and their raw data
    zipcode_table = get_zipcodes_table(main_data_df, zipcodes)

    # Dict of all the boroughs and their complaint counts
    zipcode_complaints = get_key_complaints(zipcodes, complaints)

    for zipcode in zipcodes:
        for complaint in zipcode_table[zipcode]["Complaint Type"]:
            # Skip any complaint not in our top ten
            if not complaint in complaints:
                continue
            zipcode_complaints[zipcode][complaint] +=1
    return zipcode_complaints

In [114]:
if __name__ == "__main__":
    main_data_df = get_main_city_data()
    zip_data_df = get_city_zip_data()

    # Get the top 10 complaints_count complaints
    top_complaints = get_top_complaints(main_data_df, count=10)
    top_ten_complaint_names = top_complaints.keys()

    borough_zips = get_borough_zipcodes(main_data_df)

    valid_zipcodes = []
    for borough in borough_zips:
        valid_zipcodes.extend(borough_zips[borough])

    top_populated_zipcodes = get_most_populated_zipcodes(zip_data_df, valid_zipcodes, count=10)
    zipcode_complainers = calculate_zipcode_complainers(main_data_df, top_populated_zipcodes.keys(), top_complaints)
    zipcode_complaints_dfs = []
    for zipcode in zipcode_complainers.keys():
        complainer_df = pd.DataFrame( 
             {
                'Zipcode'                   : zipcode,
                top_ten_complaint_names[0]  : zipcode_complainers[zipcode][top_ten_complaint_names[0]],
                top_ten_complaint_names[1]  : zipcode_complainers[zipcode][top_ten_complaint_names[1]],
                top_ten_complaint_names[2]  : zipcode_complainers[zipcode][top_ten_complaint_names[2]],
                top_ten_complaint_names[3]  : zipcode_complainers[zipcode][top_ten_complaint_names[3]],
                top_ten_complaint_names[4]  : zipcode_complainers[zipcode][top_ten_complaint_names[4]],
                top_ten_complaint_names[5]  : zipcode_complainers[zipcode][top_ten_complaint_names[4]],
                top_ten_complaint_names[6]  : zipcode_complainers[zipcode][top_ten_complaint_names[6]],
                top_ten_complaint_names[7]  : zipcode_complainers[zipcode][top_ten_complaint_names[7]],
                top_ten_complaint_names[8]  : zipcode_complainers[zipcode][top_ten_complaint_names[8]],
                top_ten_complaint_names[9]  : zipcode_complainers[zipcode][top_ten_complaint_names[9]],
            }, index={len(zipcode_complaints_dfs)+1}
        )
        zipcode_complaints_dfs.append(complainer_df)

In [115]:
zipcodes_complaint_table = zipcode_complaints_dfs[0].append([
    zipcode_complaints_dfs[1], zipcode_complaints_dfs[2], zipcode_complaints_dfs[3], 
    zipcode_complaints_dfs[4], zipcode_complaints_dfs[5], zipcode_complaints_dfs[6],
    zipcode_complaints_dfs[7], zipcode_complaints_dfs[8], zipcode_complaints_dfs[9]
])


In [116]:
zipcodes_complaint_table

Unnamed: 0,Zipcode,Request Large Bulky Item Collection,UNSANITARY CONDITION,Illegal Parking,Noise,PAINT/PLASTER,PLUMBING,HEAT/HOT WATER,Water System,DOOR/WINDOW,GENERAL
1,11368,0,1,1,0,0,0,0,0,0,0
2,11226,0,1,1,0,2,2,2,0,2,0
3,11373,1,1,2,0,0,0,0,0,0,0
4,11220,3,0,0,1,1,1,0,1,1,1
5,11385,15,0,5,1,0,0,1,0,0,0
6,10467,0,3,1,0,1,1,0,0,1,2
7,10025,0,0,0,1,0,0,0,1,1,1
8,11208,0,1,1,0,0,0,0,1,1,2
9,11236,1,0,1,0,2,2,0,0,1,0
10,11207,0,0,1,0,0,0,0,0,1,1
