In [1]:
from datetime import datetime, date
from functools import reduce

import pandas as pd
import numpy as np

import math


# Begin processing Income Dataframe

In [2]:
income_df = pd.read_csv("./datasets/Vital_Signs_16_Census_Demographics.csv")

In [3]:
income_neighborhood = []
income_total_pop = []
income_medium_hh = []

for index, row in income_df.iterrows():
    if "/" in row["CSA2010"]:        
        temp_districts = row["CSA2010"].split("/")
        
        for dist in temp_districts:
            income_neighborhood.append(dist.lower())
            income_total_pop.append(row["tpop10"])
            income_medium_hh.append(row["mhhi16"])
    else:
        income_neighborhood.append(row["CSA2010"].lower())
        income_total_pop.append(row["tpop10"])
        income_medium_hh.append(row["mhhi16"])
    

processed_income = {
    'Neighborhood': income_neighborhood,
    'Total Population': income_total_pop,
    'Median Household Income': income_medium_hh
}

processed_income_df = pd.DataFrame(data=processed_income)

In [4]:
processed_income_df.head(10)

Unnamed: 0,Median Household Income,Neighborhood,Total Population
0,37302.17105,allendale,16217
1,37302.17105,irvington,16217
2,37302.17105,s. hilton,16217
3,53565.0797,beechfield,12264
4,53565.0797,ten hills,12264
5,53565.0797,west hills,12264
6,40482.35965,belair-edison,17416
7,38603.93023,brooklyn,14243
8,38603.93023,curtis bay,14243
9,38603.93023,hawkins point,14243


In [5]:
unique_income_neighborhoods = np.unique(processed_income_df["Neighborhood"])

# Begin processing Crime Dataframe

In [6]:
crime_df = pd.read_csv("./datasets/BPD_Part_1_Victim_Based_Crime_Data.csv")

In [7]:
start = datetime(2016, 1, 1, 0, 0, 0)
end = datetime(2016, 12, 31, 23, 59, 59)

print(start)
print(end)

2016-01-01 00:00:00
2016-12-31 23:59:59


In [8]:
# Preprocess crime data from BDP Dataset
crime_datetime = []
crime_datetimeofyear = []
crime_descript = []
crime_district = []
crime_neighbor = []

# Iterate through all rows, attempt to get datetime columns parsed and working
for index, row in crime_df.iterrows():
    datetime_str = "{} {}".format(row["CrimeDate"], row["CrimeTime"])
    worked_first_parse = False
    
    try:
        datetime_processed = datetime.strptime(datetime_str, '%m/%d/%Y %H:%M:%S')
        worked_first_parse = True
        
    except ValueError as e:
        pass

    if not worked_first_parse:
        try:
            datetime_processed = datetime.strptime(datetime_str, '%m/%d/%Y %H%M')
        except ValueError as e:
            continue

    if start <= datetime_processed <= end:
        n = row["Neighborhood"]
        if isinstance(n, str):
            pd_dt = pd.to_datetime(datetime_processed)
            crime_datetime.append(pd_dt)
            crime_datetimeofyear.append(pd_dt.dayofyear)
            crime_descript.append(row["Description"].lower())
            crime_district.append(row["District"].lower())
            crime_neighbor.append(row["Neighborhood"].lower())
        
# Append to a new dataframe
processed_crime = {
    'Datetime': crime_datetime,
    'Day of the Year': crime_datetimeofyear,
    'Description': crime_descript,
    'District': crime_district,
    'Neighborhood': crime_neighbor
}

processed_crime_df = pd.DataFrame(data=processed_crime)


In [9]:
# wow = crime_df.groupby(['Day of the Year', "Neighborhood"]).size().reset_index(name="WOW")
# crime_df.to_csv("lmao.csv", encoding="utf-8")
# len(processed_crime_df)
# processed_crime_df.head(10)

In [10]:
unique_crime_neighborhoods = np.unique(processed_crime_df["Neighborhood"])

# Begin processing service Dataframe

In [11]:
service_df = pd.read_csv("./datasets/311_Customer_Service_Requests.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [15]:
service_df.head(20)


service_type = []
service_agency = []
service_neighborhood = []
service_method_received = []
service_creation_date = []

for index, row in service_df.iterrows():
    service_type.append(row["SRType"])
    service_agency.append(row["Agency"])
    service_neighborhood.append(row["Neighborhood"].lower())
    service_method_received.append(row["MethodReceived"].lower())
    service_creation_date.append(row["CreatedDate"])    

processed_service = {
    'Service Requested Type': service_type,
    'Agency': service_agency,
    'Neighborhood': service_neighborhood,
    'Method Received': service_method_received,
    'Creation Date': service_creation_date
}

processed_service_df = pd.DataFrame(data=processed_service)

In [16]:
processed_service_df.head(5)

Unnamed: 0,Agency,Creation Date,Method Received,Neighborhood,Service Requested Type
0,Bureau of Water and Waste Water,02/01/2015 08:12:00 AM +0000,interface,canton,WW Water Leak (Exterior)
1,Liquor License Board,02/01/2015 08:48:00 AM +0000,interface,greektown,BCLB-Liquor License Complaint
2,Bureau of Water and Waste Water,02/01/2015 09:13:00 AM +0000,phone,millhill,WW Hydrant Open
3,Department of Transportation,02/01/2015 09:24:00 AM +0000,phone,medfield,TRM-Snow/Icy Conditions
4,Department of Transportation,02/01/2015 09:29:00 AM +0000,phone,east baltimore midway,TRM-Snow/Icy Conditions


In [17]:
unique_service_neighborhoods = np.unique(processed_service_df["Neighborhood"])


# Begin intersecting data and only using districts that have been defined uniformly

In [22]:
unique_neighborhoods_inter = reduce(np.intersect1d, (unique_income_neighborhoods,
                                    unique_service_neighborhoods, unique_crime_neighborhoods))

print(unique_neighborhoods_inter)
print(len(unique_neighborhoods_inter))


['allendale' 'arlington' 'ashburton' 'barclay' 'beechfield'
 'belair-edison' 'brooklyn' 'canton' 'cedonia' 'cherry hill' 'cheswolde'
 'coldspring' 'curtis bay' 'dickeyville' 'dorchester' 'downtown'
 'druid heights' 'edmondson village' 'federal hill' 'fells point'
 'forest park' 'frankford' 'franklintown' 'guilford' 'hampden'
 'harlem park' 'hawkins point' 'highlandtown' 'hollins market' 'homeland'
 'howard park' 'inner harbor' 'irvington' 'lakeland' 'lauraville'
 'little italy' 'loch raven' 'medfield' 'middle east' 'morrell park'
 'mount washington' 'mount winans' 'oldtown' 'orangeville' 'penn north'
 'poppleton' 'remington' 'reservoir hill' 'sandtown-winchester'
 'seton hill' 'ten hills' 'upton' 'violetville' 'walbrook'
 'west arlington' 'west hills' 'westport' 'woodberry']
58


In [44]:
neighbor_filter_income_df = processed_income_df[processed_income_df["Neighborhood"].isin(unique_neighborhoods_inter)]
neighbor_filter_income_df.head(5)


Unnamed: 0,Median Household Income,Neighborhood,Total Population
0,37302.17105,allendale,16217
1,37302.17105,irvington,16217
3,53565.0797,beechfield,12264
4,53565.0797,ten hills,12264
5,53565.0797,west hills,12264


In [45]:
neighbor_filter_crime_df = processed_crime_df[processed_crime_df["Neighborhood"].isin(unique_neighborhoods_inter)]
neighbor_filter_crime_df.head(5)



Unnamed: 0,Datetime,Day of the Year,Description,District,Neighborhood
2,2016-12-31 23:30:00,366,larceny from auto,southeastern,canton
5,2016-12-31 23:15:00,366,burglary,southwestern,irvington
8,2016-12-31 22:30:00,366,robbery - street,northeastern,belair-edison
11,2016-12-31 21:30:00,366,larceny,western,penn north
12,2016-12-31 21:27:00,366,common assault,southern,cherry hill


In [46]:
neighbor_filter_service_df = processed_service_df[processed_service_df["Neighborhood"].isin(unique_neighborhoods_inter)]
neighbor_filter_service_df.head(5)

Unnamed: 0,Agency,Creation Date,Method Received,Neighborhood,Service Requested Type
0,Bureau of Water and Waste Water,02/01/2015 08:12:00 AM +0000,interface,canton,WW Water Leak (Exterior)
3,Department of Transportation,02/01/2015 09:24:00 AM +0000,phone,medfield,TRM-Snow/Icy Conditions
8,Mayors Office of Information Technology,02/01/2015 11:50:00 AM +0000,interface,hollins market,ECC-Miscellaneous Request
9,Department of Transportation,02/01/2015 12:07:00 PM +0000,interface,belair-edison,BGE-StLight(s) Out Rear
11,Department of Transportation,10/27/2016 08:24:17 AM +0000,internet,morrell park,TRM-Grass Mowing


In [47]:
neighbor_filter_income_df.to_csv("income.csv",encoding="utf-8")

In [48]:
neighbor_filter_crime_df.to_csv("crime.csv",encoding="utf-8")

In [49]:
neighbor_filter_service_df.to_csv("service.csv",encoding="utf-8")