\pagebreak

## spearman.ipynb (spearman calculating\pagebreak file)

### Libraries and functions

In [None]:
import pandas as pd

In [None]:
#creates a list of rankings from a given list
def ranking(input):
    #saves the location of the items in the original list
    value_index_pair = list(zip(input,range(len(input))))

    #sorts the list by the value
    value_index_pair_sorted = sorted(value_index_pair)

    ranks = [0]*len(input)
    #ranks the items in the list and stores it in the original location
    for i, item in enumerate(value_index_pair_sorted):
        #print(i,item)
        ranks[item[1]] = i+1

    return ranks

In [None]:
#computes the spearman coefficient given two ranking lists
def compute_spearman_ranks(x_ranks,y_ranks):
    n = len(x_ranks)

    d_square = []

    #d^2 for each row of data ranks input
    for x,y in zip(x_ranks,y_ranks):
        diff = x - y
        d_square.append(diff**2)
    
    #sum of d^2 terms
    sum_d_square = sum(d_square)

    #calculate spearman coefficient
    rs = 1 - (6*sum_d_square/(n*(n**2-1)))
    return rs

\pagebreak

### Read csv files

In [None]:
#read combined csv file (grid data)
combined_df = pd.read_csv("..\..\CSV_files\Combined_Data_2.csv")

#drop nan values in traffic volume
combined_dropna_df = combined_df[combined_df["Average Traffic Volume"].notna()]
combined_dropna_df["Incidents/Volume Ratio"] = combined_dropna_df["Total Incidents"]/combined_dropna_df["Average Traffic Volume"]

#read weather data with incident count
weather_df = pd.read_csv("..\..\CSV_files\WeatherIncidents.csv")

\pagebreak

### Calculate and output spearman coefficients

In [None]:
#calculate incident ranking normalizing for traffic volume (for grid data) (x value)
incident_volume = list(combined_dropna_df["Incidents/Volume Ratio"])
incident_volume_rank = ranking(incident_volume)

#calculate incident ranking normalizing for daily rate(for weather data) (x value)
incident_count = list(weather_df["Count"])
incident_count_rank = ranking(incident_count)

#calculate camera number ranking (y value) and spearman coefficient
cameras = list(combined_dropna_df["Total Cameras"])
cameras_rank = ranking(cameras)
cameras_spear = compute_spearman_ranks(cameras_rank, incident_volume_rank)

#calculate signals number ranking (y value) and spearman coefficient
signals = list(combined_dropna_df["Total Signals"])
signals_rank = ranking(signals)
signals_spear = compute_spearman_ranks(signals_rank, incident_volume_rank)

#calculate signs number ranking (y value) and spearman coefficient
signs = list(combined_dropna_df["Total Signs"])
signs_rank = ranking(signs)
signs_spear = compute_spearman_ranks(signs_rank, incident_volume_rank)

#remove nan values in average speed
combined_dropna_df = combined_dropna_df[combined_dropna_df["Average Speed Limit"].notna()]
incident_volume = list(combined_dropna_df["Incidents/Volume Ratio"])
incident_volume_rank = ranking(incident_volume)

#calculate speed number ranking (y value) and spearman coefficient
speed = list(combined_dropna_df["Average Speed Limit"])
speed_rank = ranking(speed)
speed_spear = compute_spearman_ranks(speed_rank, incident_volume_rank)

#calculate visibility ranking (y value) and spearman coefficient
visib = list(weather_df["Visibility (km)"])
visib_rank = ranking(visib)
visib_spear = compute_spearman_ranks(visib_rank, incident_count_rank)

#calculate temperature ranking (y value) and spearman coefficient
temp = list(weather_df["Mean Temp (C)"])
temp_rank = ranking(temp)
temp_spear = compute_spearman_ranks(temp_rank, incident_count_rank)

In [None]:
#grid data spearman coefficients
print("Grid data")
print("Spearman correlation against incident/volume")
print("Cameras:",cameras_spear)
print("Signals:",signals_spear)
print("Signs:",signs_spear)
print("Speed:",speed_spear, "\n")

#weather data spearman coefficients
print("Weather data")
print("Spearman correlation against average daily incidents")
print("Visibility:",visib_spear)
print("Temperature:",temp_spear)