# Brownsville Project - Analysis

This codebook contains a brief analysis of the most 

In [3]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

import datetime
from collections import Counter
from helpers import *

In [4]:
raw_data = pd.read_csv("brownsville.csv", delimiter=',')
raw_data.head()

Unnamed: 0,Zip,Borough,BoroughID,BuildingID,ProblemID,ComplaintID,UnitTypeID,UnitType,SpaceTypeID,SpaceType,...,MajorCategoryID,MajorCategory,MinorCategoryID,MinorCategory,CodeID,Code,StatusID,Status,StatusDate,StatusDescription
0,11233,BROOKLYN,3,219263,14570531,6977738,91,APARTMENT,541,BATHROOM,...,65,WATER LEAK,380,HEAVY FLOW,2828,AT WALL OR CEILING,2,CLOSE,07/31/2014,The Department of Housing Preservation and Dev...
1,11212,BROOKLYN,3,283890,14618845,6996145,91,APARTMENT,546,KITCHEN,...,9,PLUMBING,68,WATER SUPPLY,2564,NO WATER,2,CLOSE,08/23/2014,The Department of Housing Preservation and Dev...
2,11212,BROOKLYN,3,283890,14618846,6996145,91,APARTMENT,545,ENTRANCE/FOYER,...,56,DOOR/WINDOW,333,DOOR,2664,BROKEN OR MISSING,2,CLOSE,08/23/2014,The Department of Housing Preservation and Dev...
3,11212,BROOKLYN,3,283890,14618847,6996145,91,APARTMENT,542,BEDROOM,...,63,UNSANITARY CONDITION,375,MOLD,2817,,2,CLOSE,08/23/2014,The Department of Housing Preservation and Dev...
4,11233,BROOKLYN,3,330347,14585091,6982717,91,APARTMENT,543,ENTIRE APARTMENT,...,59,HEAT/HOT WATER,348,APARTMENT ONLY,2833,NO HEAT AND NO HOT WATER,2,CLOSE,08/05/2014,More than one complaint was received for this ...


In [5]:
day = {
    0: "Monday", 
    1: "Tuesday", 
    2: "Wednesday", 
    3: "Thursday", 
    4: "Friday", 
    5: "Saturday", 
    6: "Sunday"
}
raw_data["StatusDate"] = pd.to_datetime(raw_data["StatusDate"])
raw_data["StatusDateWeekday"] = raw_data["StatusDate"].dt.dayofweek
days_reports = raw_data["StatusDateWeekday"].value_counts()
days_reports.index = days_reports.index.map(day)
days_reports

Monday       29894
Thursday     27863
Friday       23030
Tuesday      21678
Wednesday    21157
Sunday       10812
Saturday      9244
Name: StatusDateWeekday, dtype: int64

In [6]:
get_feature_occurrences_by_building(raw_data, 219263, by=["Type", "MajorCategory", "MinorCategory"])

Type           MajorCategory         MinorCategory            
EMERGENCY      WATER LEAK            HEAVY FLOW                   8
               UNSANITARY CONDITION  MOLD                         6
NON EMERGENCY  UNSANITARY CONDITION  PESTS                        4
EMERGENCY      ELECTRIC              NO LIGHTING                  3
NON EMERGENCY  WATER LEAK            SLOW LEAK                    3
               FLOORING/STAIRS       FLOOR                        2
               PAINT/PLASTER         WALL                         2
               UNSANITARY CONDITION  GARBAGE/RECYCLING STORAGE    2
EMERGENCY      DOOR/WINDOW           DOOR                         1
               GENERAL               CABINET                      1
dtype: int64

In [7]:
building_common_complaints = []
for _id in set(raw_data["BuildingID"]):
    common_complaints = get_feature_occurrences_by_building(
                                                            raw_data,
                                                            _id,
                                                            by=["MajorCategory", "MinorCategory"],
                                                            find_all=True)

    building_common_complaints.append((_id, common_complaints))

In [8]:
building_common_complaints.sort(key=lambda e: e[1].values.sum(), reverse=True)
len(building_common_complaints[0][1])

46

In [None]:
for building in building_common_complaints[:10]:
    id_, complaints = building
    major_category, minor_category = complaints.index[0]
    num_complaints = complaints.values.sum()

    print("Building ID:", id_)
    print("Most common major category:", major_category)
    print("Most common minor category:", minor_category)
    print("Number of complaints:", num_complaints)

    print()

In [None]:
groups = raw_data.groupby(["BuildingID"])["MajorCategory"]


In [None]:
for _id in set(raw_data["BuildingID"]):
    pass