In [None]:
import os

def scale_input_data(scale_factor):
  file_bases = ['./input/UK_Traffic_Accidents_2015']
  for file_base in file_bases:
    import pandas as pd
    import shutil
    if scale_factor == 1.0:
      shutil.copyfile(file_base + '.csv', file_base + '.scaled.csv')
      continue
    df_to_scale = pd.read_csv(file_base + '.csv')
    new_num_rows = int(scale_factor * len(df_to_scale))
    if scale_factor <= 1.0:
      df_to_scale = df_to_scale.iloc[:new_num_rows]
    else:
      while len(df_to_scale) < new_num_rows:
        df_to_scale = pd.concat([df_to_scale, df_to_scale[:min(new_num_rows - len(df_to_scale), len(df_to_scale))]])
    df_to_scale.to_csv(file_base + '.scaled.csv', index=False)

if 'INPUT_SCALE_FACTOR' in os.environ:
  scale_input_data(float(os.environ['INPUT_SCALE_FACTOR']))

# When do traffic-related deaths occur?

The dataset is taken from data.gov.uk and contains all traffic-related deaths in the UK in 2015. Below is a simple visualisation relating to the timing of the accidents. 

Source: https://data.gov.uk/dataset/road-accidents-safety-data/resource/ceb00cff-443d-4d43-b17a-ee13437e9564
<br>

### Preprocessing 

In [1]:
# import pandas as pd
exec(os.environ['IREWR_IMPORTS'])
import numpy as np
# ALEX: remove plotting
# import matplotlib.pyplot as plt
# import seaborn as sns
# %matplotlib inline

In [2]:
# The original dataset
df = pd.read_csv("./input/UK_Traffic_Accidents_2015.scaled.csv")
df.head()

  df = pd.read_csv("./input/UK_Traffic_Accidents_2015.csv")


Unnamed: 0,Accident_Index,Location_Easting_OSGR,Location_Northing_OSGR,Longitude,Latitude,Police_Force,Accident_Severity,Number_of_Vehicles,Number_of_Casualties,Date,...,Pedestrian_Crossing-Human_Control,Pedestrian_Crossing-Physical_Facilities,Light_Conditions,Weather_Conditions,Road_Surface_Conditions,Special_Conditions_at_Site,Carriageway_Hazards,Urban_or_Rural_Area,Did_Police_Officer_Attend_Scene_of_Accident,LSOA_of_Accident_Location
0,201501BS70001,525130.0,180050.0,-0.198465,51.505538,1,3,1,1,12/01/2015,...,0,0,4,1,1,0,0,1,1,E01002825
1,201501BS70002,526530.0,178560.0,-0.178838,51.491836,1,3,1,1,12/01/2015,...,0,0,1,1,1,0,0,1,1,E01002820
2,201501BS70004,524610.0,181080.0,-0.20559,51.51491,1,3,1,1,12/01/2015,...,0,1,4,2,2,0,0,1,1,E01002833
3,201501BS70005,524420.0,181080.0,-0.208327,51.514952,1,3,1,1,13/01/2015,...,0,0,1,1,2,0,0,1,2,E01002874
4,201501BS70008,524630.0,179040.0,-0.206022,51.496572,1,2,2,1,09/01/2015,...,0,5,1,2,2,0,0,1,2,E01002814


In [3]:
df = pd.read_csv("./input/UK_Traffic_Accidents_2015.scaled.csv", usecols=["Day_of_Week", "Time"])
df.dropna(inplace=True)
df.head()

Unnamed: 0,Day_of_Week,Time
0,2,18:45
1,2,07:50
2,2,18:08
3,3,07:40
4,6,07:30


In [4]:
df['Time'] = df['Time'].map(lambda x: str(x)[:-3])
df.head()

Unnamed: 0,Day_of_Week,Time
0,2,18
1,2,7
2,2,18
3,3,7
4,6,7


In [5]:
# Convert "Day_of_Week" to string
df["Day_of_Week"] = df["Day_of_Week"].astype(str)

In [6]:
# Adding column for ordering the days of the week
df['Day'] = df["Day_of_Week"]
df.head()

Unnamed: 0,Day_of_Week,Time,Day
0,2,18,2
1,2,7,2
2,2,18,2
3,3,7,3
4,6,7,6


In [7]:
df["Day_of_Week"] = df["Day_of_Week"].replace("1", "Sunday")
df["Day_of_Week"] = df["Day_of_Week"].replace("2", "Monday")
df["Day_of_Week"] = df["Day_of_Week"].replace("3", "Tuesday")
df["Day_of_Week"] = df["Day_of_Week"].replace("4", "Wednesday")
df["Day_of_Week"] = df["Day_of_Week"].replace("5", "Thursday")
df["Day_of_Week"] = df["Day_of_Week"].replace("6", "Friday")
df["Day_of_Week"] = df["Day_of_Week"].replace("7", "Saturday")

df.head()

Unnamed: 0,Day_of_Week,Time,Day
0,Monday,18,2
1,Monday,7,2
2,Monday,18,2
3,Tuesday,7,3
4,Friday,7,6


In [8]:
# Using .ctrosstab() to create a pivot table
df_pivot = pd.crosstab(df["Day_of_Week"], df["Time"])
df_pivot

Time,00,01,02,03,04,05,06,07,08,09,...,14,15,16,17,18,19,20,21,22,23
Day_of_Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Friday,216,157,114,79,73,172,426,1004,1667,1025,...,1421,1936,1908,2022,1568,1202,887,695,646,540
Monday,212,138,73,77,79,175,437,1096,1726,1031,...,1199,1632,1776,1902,1429,992,714,509,414,277
Saturday,432,357,325,240,182,189,222,348,514,771,...,1272,1213,1180,1250,1185,980,700,632,641,574
Sunday,517,418,334,290,224,195,229,293,331,522,...,1117,1095,1122,1087,907,731,651,522,409,302
Thursday,213,142,102,105,90,152,449,1243,1927,1050,...,1206,1670,1889,2113,1691,1194,804,564,504,366
Tuesday,194,109,76,61,57,176,459,1239,1993,1204,...,1128,1675,1840,2222,1613,1135,757,556,467,292
Wednesday,158,108,60,67,64,180,529,1307,2095,1185,...,1168,1619,1880,2137,1513,1106,718,559,460,311


In [9]:
# Making the index chronological
df_pivot = df_pivot.reindex(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])
df_pivot

Time,00,01,02,03,04,05,06,07,08,09,...,14,15,16,17,18,19,20,21,22,23
Day_of_Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Monday,212,138,73,77,79,175,437,1096,1726,1031,...,1199,1632,1776,1902,1429,992,714,509,414,277
Tuesday,194,109,76,61,57,176,459,1239,1993,1204,...,1128,1675,1840,2222,1613,1135,757,556,467,292
Wednesday,158,108,60,67,64,180,529,1307,2095,1185,...,1168,1619,1880,2137,1513,1106,718,559,460,311
Thursday,213,142,102,105,90,152,449,1243,1927,1050,...,1206,1670,1889,2113,1691,1194,804,564,504,366
Friday,216,157,114,79,73,172,426,1004,1667,1025,...,1421,1936,1908,2022,1568,1202,887,695,646,540
Saturday,432,357,325,240,182,189,222,348,514,771,...,1272,1213,1180,1250,1185,980,700,632,641,574
Sunday,517,418,334,290,224,195,229,293,331,522,...,1117,1095,1122,1087,907,731,651,522,409,302


### Visualization

In [10]:
# ALEX: remove plotting
# fig, ax = plt.subplots(figsize=(30,8))
# graph = sns.heatmap(df_pivot, cmap="Blues", linecolor="white", linewidths=0.1)

# ax.set_title("Number of traffic-related deaths per day & hour combination", y=1.3, fontsize=30, fontweight="bold")
# ax.set_xlabel("")
# ax.set_ylabel("")

# #from matplotlib import rcParams
# #rcParams['axes.titlepad'] = 130 # Space between the title and graph

# locs, labels = plt.yticks() # Rotating row labels
# plt.setp(labels, rotation=0) # Rotating row labels

# ax.xaxis.tick_top() # x axis on top
# ax.xaxis.set_label_position('top') # x axis on top

# graph.tick_params(axis='both',labelsize=15) # Tick label size
# graph