In [35]:
from datetime import datetime, date

import pandas as pd
import numpy as np

import math


# Begin processing Crime Dataframe

In [24]:
crime_df = pd.read_csv("./datasets/BPD_Part_1_Victim_Based_Crime_Data.csv")

In [33]:
start = datetime(2016, 1, 1, 0, 0, 0)
end = datetime(2016, 12, 31, 23, 59, 59)

print(start)
print(end)

2016-01-01 00:00:00
2016-12-31 23:59:59


In [37]:
# Preprocess crime data from BDP Dataset
crime_datetime = []
crime_datetimeofyear = []
crime_descript = []
crime_district = []
crime_neighbor = []

# Iterate through all rows, attempt to get datetime columns parsed and working
for index, row in crime_df.iterrows():
    datetime_str = "{} {}".format(row["CrimeDate"], row["CrimeTime"])
    worked_first_parse = False
    
    try:
        datetime_processed = datetime.strptime(datetime_str, '%m/%d/%Y %H:%M:%S')
        worked_first_parse = True
        
    except ValueError as e:
        pass

    if not worked_first_parse:
        try:
            datetime_processed = datetime.strptime(datetime_str, '%m/%d/%Y %H%M')
        except ValueError as e:
            continue

    if start <= datetime_processed <= end:
        n = row["Neighborhood"]
        if isinstance(n, str):
            pd_dt = pd.to_datetime(datetime_processed)
            crime_datetime.append(pd_dt)
            crime_datetimeofyear.append(pd_dt.dayofyear)
            crime_descript.append(row["Description"])
            crime_district.append(row["District"])
            crime_neighbor.append(row["Neighborhood"])
        
# Append to a new dataframe
processed_crime = {
    'Datetime': crime_datetime,
    'Day of the Year': crime_datetimeofyear,
    'Description': crime_descript,
    'District': crime_district,
    'Neighborhood': crime_neighbor
}

processed_crime_df = pd.DataFrame(data=processed_crime)


In [41]:
# wow = crime_df.groupby(['Day of the Year', "Neighborhood"]).size().reset_index(name="WOW")
# crime_df.to_csv("lmao.csv", encoding="utf-8")
# len(processed_crime_df)
processed_crime_df.head(10)

Unnamed: 0,Datetime,Day of the Year,Description,District,Neighborhood
0,2016-12-31 23:51:00,366,AGG. ASSAULT,EASTERN,Darley Park
1,2016-12-31 23:30:00,366,COMMON ASSAULT,NORTHWESTERN,Central Park Heights
2,2016-12-31 23:30:00,366,LARCENY FROM AUTO,SOUTHEASTERN,Canton
3,2016-12-31 23:30:00,366,LARCENY,EASTERN,CARE
4,2016-12-31 23:28:00,366,AGG. ASSAULT,NORTHERN,Kenilworth Park
5,2016-12-31 23:15:00,366,BURGLARY,SOUTHWESTERN,Irvington
6,2016-12-31 23:00:00,366,LARCENY FROM AUTO,NORTHERN,Charles Village
7,2016-12-31 23:00:00,366,LARCENY,NORTHEASTERN,Woodbourne Heights
8,2016-12-31 22:30:00,366,ROBBERY - STREET,NORTHEASTERN,Belair-Edison
9,2016-12-31 22:05:00,366,BURGLARY,SOUTHEASTERN,Patterson Park Neighborho


# Begin processing Income Dataframe

In [None]:
income_df = pd.read_csv("./datasets/Vital_Signs_16_Census_Demographics.csv")

In [43]:
income_neighborhood = []
income_total_pop = []
income_medium_hh = []

for index, row in income_df.iterrows():
    if "/" in row["CSA2010"]:        
        temp_districts = row["CSA2010"].split("/")
        
        for dist in temp_districts:
            income_neighborhood.append(dist)
            income_total_pop.append(row["tpop10"])
            income_medium_hh.append(row["mhhi16"])
    else:
        income_neighborhood.append(row["CSA2010"])
        income_total_pop.append(row["tpop10"])
        income_medium_hh.append(row["mhhi16"])
    

processed_income = {
    'Neighborhood': income_neighborhood,
    'Total Population': income_total_pop,
    'Median Household Income': income_medium_hh
}

processed_income_df = pd.DataFrame(data=processed_income)

In [44]:
processed_income_df.head(10)


Unnamed: 0,Median Household Income,Neighborhood,Total Population
0,37302.17105,Allendale,16217
1,37302.17105,Irvington,16217
2,37302.17105,S. Hilton,16217
3,53565.0797,Beechfield,12264
4,53565.0797,Ten Hills,12264
5,53565.0797,West Hills,12264
6,40482.35965,Belair-Edison,17416
7,38603.93023,Brooklyn,14243
8,38603.93023,Curtis Bay,14243
9,38603.93023,Hawkins Point,14243


In [46]:
np.unique(processed_income_df["Neighborhood"])

array(['Allendale', 'Arlington', 'Armistead', 'Ashburton', 'Barclay',
       'Beechfield', 'Belair-Edison', 'Belvedere', 'Brooklyn', 'Canton',
       'Cedonia', 'Cherry Hill', 'Cheswolde', 'Chinquapin Park',
       'Claremont', 'Clifton-Berea', 'Coldspring', 'Coldstream',
       'Cross-Country', 'Curtis Bay', 'Dickeyville', 'Dorchester',
       'Downtown', 'Druid Heights', 'East End', 'East Highlandtown',
       'Echodale', 'Edmondson Village', 'Federal Hill', 'Fells Point',
       'Forest Park', 'Frankford', 'Franklintown', 'Glen-Fallstaff',
       'Greater Charles Village', 'Greater Govans', 'Greater Mondawmin',
       'Greater Roland Park', 'Greater Rosemont', 'Greenmount East',
       'Guilford', 'Hamilton', 'Hampden', 'Harbor East', 'Harford',
       'Harlem Park', 'Hawkins Point', 'Highlandtown', 'Hilltop',
       'Hollins Market', 'Homeland', 'Howard Park', 'Inner Harbor',
       'Irvington', 'Lakeland', 'Lauraville', 'Little Italy',
       'Loch Raven', 'Madison', 'Medfield', '

# Begin processing service Dataframe

In [19]:
service_df = pd.read_csv("./datasets/311_Customer_Service_Requests.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [21]:
service_df.head(20)


service_type = []
service_agency = []
service_neighborhood = []
service_method_received = []
service_creation_date = []

for index, row in service_df.iterrows():
    service_type.append(row["SRType"])
    service_agency.append(row["Agency"])
    service_neighborhood.append(row["Neighborhood"])
    service_method_received.append(row["MethodReceived"])
    service_creation_date.append(row["CreatedDate"])    

processed_service = {
    'Service Requested Type': service_type,
    'Agency': service_agency,
    'Neighborhood': service_neighborhood,
    'Method Received': service_method_received,
    'Creation Date': service_creation_date
}

processed_service_df = pd.DataFrame(data=processed_service)

In [40]:
processed_service_df.head(20)

Unnamed: 0,Agency,Creation Date,Method Received,Neighborhood,Service Requested Type
0,Bureau of Water and Waste Water,02/01/2015 08:12:00 AM +0000,Interface,CANTON,WW Water Leak (Exterior)
1,Liquor License Board,02/01/2015 08:48:00 AM +0000,Interface,GREEKTOWN,BCLB-Liquor License Complaint
2,Bureau of Water and Waste Water,02/01/2015 09:13:00 AM +0000,Phone,MILLHILL,WW Hydrant Open
3,Department of Transportation,02/01/2015 09:24:00 AM +0000,Phone,MEDFIELD,TRM-Snow/Icy Conditions
4,Department of Transportation,02/01/2015 09:29:00 AM +0000,Phone,EAST BALTIMORE MIDWAY,TRM-Snow/Icy Conditions
5,Department of Transportation,02/01/2015 09:30:00 AM +0000,Interface,HARFORD-ECHODALE/PERRING PARKWAY,TRM-Snow/Icy Conditions
6,Department of Transportation,02/01/2015 09:36:00 AM +0000,Phone,MADISON PARK,TRT-Traffic Signal Repairs
7,Bureau of Solid Waste,02/01/2015 11:20:00 AM +0000,Phone,FOUR BY FOUR,SW-Dirty Alley
8,Mayors Office of Information Technology,02/01/2015 11:50:00 AM +0000,Interface,HOLLINS MARKET,ECC-Miscellaneous Request
9,Department of Transportation,02/01/2015 12:07:00 PM +0000,Interface,BELAIR-EDISON,BGE-StLight(s) Out Rear
