In [1]:
import os
import pandas as pd

In [2]:
def get_folder_dict(path):
    # get the subfolder directories (year-month)
    dir_list = os.listdir(path)
    dir_list.sort()

    if '.DS_Store' in dir_list:
        dir_list.remove('.DS_Store')

    # create dictionary with files per subfolder
    dir_dict = dict()

    for folder in dir_list:
        dir_dict[folder] = os.listdir(path + '/' + folder)
        
    return dir_dict

In [3]:
def compiler_by_force(path, force, target_path):
    ''' Exports three csv files with the compiled data for the selected force'''
    # create dictionary with files per subfolder
    dir_dict = get_folder_dict(path)
    
    # create three list of paths for the street, outcomes and stop and search data of the selected force
    force_street_paths = []
    force_outcomes_paths = []
    force_stop_and_search_paths = []
    anomalies_paths = []

    for key in dir_dict:
        for item in dir_dict[key]:
            if force in item:
                if 'street' in item:
                    force_street_paths.append(key + '/' + item)
                elif 'outcomes' in item:
                    force_outcomes_paths.append(key + '/' + item)
                elif 'stop-and-search' in item:
                    force_stop_and_search_paths.append(key + '/' + item)
                else:
                    anomalies_paths.append(key + '/' + item)
    
    # create three dataframes for the street, outcomes and stop and search data of the selected force
    df_force_street = pd.DataFrame()
    df_force_outcomes = pd.DataFrame()
    df_force_stop_and_search = pd.DataFrame()

    for path_extension in force_street_paths:
        df = pd.read_csv(path + '/' + path_extension)
        df_force_street = pd.concat([df_force_street, df])
    print('Compiled ' + str(len(force_street_paths)) + ' number of street files')

    for path_extension in force_outcomes_paths:
        df = pd.read_csv(path + '/' + path_extension)
        df_force_outcomes = pd.concat([df_force_outcomes, df])
    print('Compiled ' + str(len(force_outcomes_paths)) + ' number of outcomes files')

    for path_extension in force_stop_and_search_paths:
        df = pd.read_csv(path + '/' + path_extension)
        df_force_stop_and_search = pd.concat([df_force_stop_and_search, df])
    print('Compiled ' + str(len(force_stop_and_search_paths)) + ' number of stop and search files')
    
    # export the three dataframes to csv    
    df_force_street.to_csv(target_path + '/' + force + '_street.csv')
    df_force_outcomes.to_csv(target_path + '/' + force + '_outcomes.csv')
    df_force_stop_and_search.to_csv(target_path + '/' + force + '_stop_and_search.csv')

In [4]:
def get_all_force_names(path):
    # create dictionary with files per subfolder
    dir_dict = get_folder_dict(path)
    force_list = []
    for key in dir_dict:
        for item in dir_dict[key]:
            if 'stop-and-search' in item:
                force = '-'.join(item.split('.')[0][8:].split('-')[:-3])
            else:
                force = '-'.join(item.split('.')[0][8:].split('-')[:-1])
            if force not in force_list:
                force_list.append(force)
    return force_list
    

In [5]:
def compile_all_forces(path, target_path):
    force_list = get_all_force_names(path)
    for force in force_list:
        compiler_by_force(path, force, target_path)
        print('Compiled data for ' + force)

In [6]:
path = '/Users/MoshaLangerak_1/Library/Mobile Documents/com~apple~CloudDocs/Universiteit/Bachelor/Year 2/Q3/Data Challenge 2/Jan_2010_Oct_2021'
target_path = '/Users/MoshaLangerak_1/Library/Mobile Documents/com~apple~CloudDocs/Universiteit/Bachelor/Year 2/Q3/Data Challenge 2/Data'
force = 'city-of-london'
compiler_by_force(path, force, target_path)

Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 78 number of stop and search files


In [7]:
path = '/Users/MoshaLangerak_1/Library/Mobile Documents/com~apple~CloudDocs/Universiteit/Bachelor/Year 2/Q3/Data Challenge 2/Jan_2010_Oct_2021'
target_path = '/Users/MoshaLangerak_1/Library/Mobile Documents/com~apple~CloudDocs/Universiteit/Bachelor/Year 2/Q3/Data Challenge 2/Data'
compile_all_forces(path, target_path)

Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 79 number of stop and search files
Compiled data for durham
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 83 number of stop and search files
Compiled data for west-mercia
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 83 number of stop and search files
Compiled data for north-yorkshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 81 number of stop and search files
Compiled data for staffordshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 83 number of stop and search files
Compiled data for warwickshire
Compiled 124 number of street files
Compiled 111 number of outcomes files
Compiled 76 number of stop and search files
Compiled data for west-midlands
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 80 number of stop and sea

  df_force_stop_and_search = pd.concat([df_force_stop_and_search, df])


Compiled 79 number of stop and search files
Compiled data for essex
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 82 number of stop and search files
Compiled data for west-yorkshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 79 number of stop and search files
Compiled data for gloucestershire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 79 number of stop and search files
Compiled data for bedfordshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 80 number of stop and search files
Compiled data for south-yorkshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 82 number of stop and search files
Compiled data for dyfed-powys
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 80 number of stop and search files
Compiled data for northumbria
Compiled 131 number of street fi

  df_force_stop_and_search = pd.concat([df_force_stop_and_search, df])


Compiled 79 number of stop and search files
Compiled data for humberside
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 80 number of stop and search files
Compiled data for hertfordshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 78 number of stop and search files
Compiled data for wiltshire
Compiled 103 number of street files
Compiled 90 number of outcomes files
Compiled 54 number of stop and search files
Compiled data for greater-manchester
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 55 number of stop and search files
Compiled data for lincolnshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 67 number of stop and search files
Compiled data for avon-and-somerset
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 74 number of stop and search files
Compiled data for cumbria
Compiled 131 number of street 

  df_force_stop_and_search = pd.concat([df_force_stop_and_search, df])


Compiled 76 number of stop and search files
Compiled data for derbyshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 79 number of stop and search files
Compiled data for kent
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 82 number of stop and search files
Compiled data for suffolk
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 79 number of stop and search files
Compiled data for sussex
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 83 number of stop and search files
Compiled data for leicestershire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 78 number of stop and search files
Compiled data for nottinghamshire
Compiled 131 number of street files
Compiled 118 number of outcomes files
Compiled 78 number of stop and search files
Compiled data for merseyside
Compiled 131 number of street files
Compiled 118 

  df_force_stop_and_search = pd.concat([df_force_stop_and_search, df])


Compiled 83 number of stop and search files
Compiled data for dorset
Compiled 119 number of street files
Compiled 0 number of outcomes files
Compiled 79 number of stop and search files
Compiled data for btp
Compiled 122 number of street files
Compiled 0 number of outcomes files
Compiled 0 number of stop and search files
Compiled data for northern-ireland
