In [21]:
import pandas as pd
from os.path import isfile

In [22]:
boroughs = ['Camden', 'Hackney', 'Hammersmith and Fulham', 'Haringey', 'Islington', 'Kensington and Chelsea', 'Lambeth', 'Lewisham', 'Newham', 'Southwark', 'Tower Hamlets', 'Wandsworth', 'Westminster', 'Barking and Dagenham', 'Barnet', 'Bexley', 'Brent', 'Bromley', 'Croydon', 'Ealing', 'Enfield', 'Greenwich', 'Harrow', 'Havering', 'Hillingdon', 'Hounslow', 'Kingston upon Thames', 'Merton', 'Redbridge', 'Richmond upon Thames', 'Sutton', 'Waltham Forest']

In [23]:
def convert_date(date):
    return '01/' + date[-2:] + '/' + date[:-2]

In [24]:
def process_crime_data(filepath):
    crime_data = pd.read_csv(filepath, usecols=lambda x: x != 'MinorText')

    is_london_borough = [x in boroughs for x in crime_data['LookUp_BoroughName']]
    crime_data_london_boroughs = crime_data[is_london_borough].copy()

    # display(crime_data)

    output_file = "./converted_data/crime_data.csv"

    for date in crime_data_london_boroughs.columns[2:]:
        data_subset = crime_data_london_boroughs[['MajorText', 'LookUp_BoroughName', date]].copy()
        data_subset.rename(columns = {'MajorText': "Crime Type", "LookUp_BoroughName": 'Borough Name', date: 'Count'}, inplace=True)
        data_subset['Period'] = convert_date(date)
        grouped_data = data_subset.groupby(['Crime Type', 'Borough Name', 'Period'], as_index=False).sum()
        # grouped_data = pd.DataFrame(grouped_data)
        # display(grouped_data)

        if not isfile(output_file):
            grouped_data.to_csv(output_file, index=False)
        else:
            grouped_data.to_csv(output_file, index=False, mode='a', header=False)

In [25]:
from os import listdir
from os.path import isfile, join

data_path = "./raw-data/crime-data"

datafile_names = listdir(data_path)
if len(datafile_names) != 1:
    raise ValueError("More than 1 crime data file found")
elif not isfile(join(data_path, datafile_names[0])):
    raise TypeError("No data file found")
else:
    datafile_path = join(data_path, datafile_names[0])
    process_crime_data(datafile_path)