##### <u>Extracting Data From CSV</u>

In [8]:
"""File documentation and copyright.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Name of the file.
DATA_NAME = "FRAFirm2.csv"
X_HOURS_PER_BIN = 1


# Turns the CSV into a dataframe.
def csv_to_dataframe():
  data = pd.read_csv(DATA_NAME)
  return pd.DataFrame(data)


# Main part of the program starts here.
dataframe = csv_to_dataframe()

# Display dataframe.
dataframe


Unnamed: 0,FIRM,Class,Start,End,TOD,FIRMF,Length,Night,Gap,WS,...,AFZ,WAFZ,MFZ,WMFZ,NBad,TIW,RIW,HIW,SIW,CCW
0,-3,0,170,680,-1,-3,510,0,750,0,...,0,0,0,0,0,2269,1,0,3,2.0
1,5,0,920,1295,-1,5,375,0,855,0,...,0,1,0,0,0,465,0,0,1,1.0
2,1,1,1130,1865,1470,-2,735,0,999,0,...,0,0,0,0,0,2855,2,0,5,0.0
3,4,0,670,1265,-1,4,595,0,605,0,...,0,1,0,0,0,1325,0,0,2,1.5
4,3,0,330,1049,-1,3,719,0,999,1,...,0,1,0,1,0,2434,2,0,4,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7067,-1,0,225,855,-1,-1,630,0,999,0,...,0,0,0,0,0,824,0,0,3,0.0
7068,5,1,1245,2030,1410,0,785,0,999,1,...,0,0,0,0,0,0,0,0,0,0.0
7069,5,0,1201,1365,-1,5,164,0,999,0,...,0,0,0,0,0,2000,1,0,3,0.0
7070,5,0,945,1485,-1,5,540,0,999,0,...,0,1,0,0,0,495,0,0,1,0.0


##### <u>Create Bins For Start</u>

In [9]:
# Creates a bin column for the dataframe.
def create_bin_column(dataframe, for_column, bin_name, x_hours):
  bins = []
  for row in dataframe.iterrows():
    bins.append(np.ceil(row[1].get(for_column) / (x_hours * 60)) - 1)
  dataframe[bin_name] = bins


# Create bin column.
create_bin_column(dataframe, "Start", "Bin", X_HOURS_PER_BIN)

# Display dataframe.
dataframe


Unnamed: 0,FIRM,Class,Start,End,TOD,FIRMF,Length,Night,Gap,WS,...,WAFZ,MFZ,WMFZ,NBad,TIW,RIW,HIW,SIW,CCW,Bin
0,-3,0,170,680,-1,-3,510,0,750,0,...,0,0,0,0,2269,1,0,3,2.0,2.0
1,5,0,920,1295,-1,5,375,0,855,0,...,1,0,0,0,465,0,0,1,1.0,15.0
2,1,1,1130,1865,1470,-2,735,0,999,0,...,0,0,0,0,2855,2,0,5,0.0,18.0
3,4,0,670,1265,-1,4,595,0,605,0,...,1,0,0,0,1325,0,0,2,1.5,11.0
4,3,0,330,1049,-1,3,719,0,999,1,...,1,0,1,0,2434,2,0,4,0.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7067,-1,0,225,855,-1,-1,630,0,999,0,...,0,0,0,0,824,0,0,3,0.0,3.0
7068,5,1,1245,2030,1410,0,785,0,999,1,...,0,0,0,0,0,0,0,0,0.0,20.0
7069,5,0,1201,1365,-1,5,164,0,999,0,...,0,0,0,0,2000,1,0,3,0.0,20.0
7070,5,0,945,1485,-1,5,540,0,999,0,...,1,0,0,0,495,0,0,1,0.0,15.0


##### <u>Calculate Start's Accident Probability</u>

In [18]:
# Calculates the accident probability for a bin.
def get_accident_probability(bin_number):
  classes = [0, 0, 0]
  for item in dataframe[dataframe.Bin == bin_number].iterrows():
    classes[int(item[1].get("Class"))] += 1
  
  # Get the probabilty of accident {1 - P(no accident)}.
  return 1 - classes[0] / sum(classes)


# Get chances of accident per bin.
print("Hour: Accident Chance")
for i in range(0, 24 // X_HOURS_PER_BIN):
  print(f"{i}: {round(get_accident_probability(i) * 100, 2)}%")

Hour: Accident Chance
0: 37.13%
1: 38.42%
2: 40.78%
3: 38.2%
4: 35.78%
5: 29.07%
6: 32.79%
7: 28.8%
8: 26.79%
9: 28.26%
10: 30.51%
11: 29.37%
12: 34.06%
13: 26.47%
14: 33.75%
15: 26.82%
16: 33.77%
17: 29.75%
18: 30.67%
19: 33.79%
20: 29.13%
21: 34.16%
22: 27.37%
23: 23.55%
