# Bron

https://en.wikipedia.org/wiki/Correlation

In [1]:
import numpy as np
import pandas as pd

In [2]:
# According to the formula we need:

# 1. Compute the average of X (avgX)
# 2. Compute the average of Y (avgY)
# 3. Compute the variation of X (stdX)
# 4. Compute the variation of Y (stdY)

# Once we have computed the ingredients above
# we must compute the sum of (Xi - avgX) * (Yi - avgY) and divide by stdX * stdY

In [3]:
# Let's define our linear dataset
X = np.array([0, 1, 2, 3, 4])
Y = np.array([1, 2, 3, 4, 5])

In [4]:
# 1. Compute the average of X (avgX)
avgX = np.average(X)

display(avgX)

# 2. Compute the average of Y (avgY)
avgY = np.average(Y)

display(avgY)

2.0

3.0

In [5]:
# 3. Compute the variation of X (stdX)
stdX = np.std(X)

display(stdX)

# 4. Compute the variation of Y (stdY)
stdY = np.std(Y)

display(stdY)

1.4142135623730951

1.4142135623730951

In [6]:
# Once we have computed the ingredients above
# we must compute the sum of (Xi - avgX) * (Yi - avgY) and divide by stdX * stdY
R = np.sum((X - avgX) * (Y - avgY)) / (len(X) * (stdX * stdY))

display(R)

0.9999999999999998

In [7]:
# Let's define the pearson correlation as a function
def pearsonCorr(X, Y):
    
    avgX = np.average(X)
    avgY = np.average(Y)
    
    stdX = np.std(X)
    stdY = np.std(Y)
    
    return np.sum((X - avgX) * (Y - avgY)) / (len(X) * (stdX * stdY))

In [14]:
# Let's use the dataset of project 1
# df = pd.read_csv("../csv/forza_2023-05-19.csv")
df = pd.read_csv("../csv/forza_2023-05-19.csv", sep=';', header=0)

In [17]:
df[0:10]

Unnamed: 0,s_speed_x,s_speed_y,s_speed_z,s_rpm,s_gear,s_angle,s_z,s_damage,s_track_position,s_race_position,s_distance_raced,s_distance_from_start,s_current_laptime,s_last_laptime,a_accelation,a_brake,a_gear,a_steer,a_clutch
0,-0.005887,-0.02797,0.000171,942,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.982,0.0,1.0,0.0,1,0.455342,0.0
1,-0.005887,-0.02797,0.000171,1100,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.962,0.0,1.0,0.0,1,0.455342,0.0
2,-0.005887,-0.02797,0.000171,1263,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.942,0.0,1.0,0.0,1,0.455342,0.0
3,-0.005887,-0.02797,0.000171,1432,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.922,0.0,1.0,0.0,1,0.455342,0.0
4,-0.005887,-0.02797,0.000171,1605,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.902,0.0,1.0,0.0,1,0.455342,0.0
5,-0.005887,-0.02797,0.000171,1783,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.882,0.0,1.0,0.0,1,0.455342,0.0
6,-0.005887,-0.02797,0.000171,1967,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.862,0.0,1.0,0.0,1,0.455342,0.0
7,-0.005887,-0.02797,0.000171,2156,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.842,0.0,1.0,0.0,1,0.455342,0.0
8,-0.005887,-0.02797,0.000171,2359,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.822,0.0,1.0,0.0,1,0.455342,0.0
9,-0.005887,-0.02797,0.000171,2580,0,0.00021,0.345256,0.0,-0.333363,1,0.0,5759.100098,-0.802,0.0,1.0,0.0,1,0.455342,0.0


In [18]:
# Look at the correlations
df.corr()

Unnamed: 0,s_speed_x,s_speed_y,s_speed_z,s_rpm,s_gear,s_angle,s_z,s_damage,s_track_position,s_race_position,s_distance_raced,s_distance_from_start,s_current_laptime,s_last_laptime,a_accelation,a_brake,a_gear,a_steer,a_clutch
s_speed_x,1.0,0.355601,0.059532,0.905532,0.906164,0.184579,-0.967646,,0.021412,,-0.025338,-0.227075,-0.162087,0.034182,-0.002312,-0.072378,0.907887,0.257831,
s_speed_y,0.355601,1.0,0.089368,0.317538,0.356833,0.358307,-0.33526,,0.252598,,-0.030582,-0.071539,-0.073987,-0.006806,0.16444,0.019929,0.36112,0.716208,
s_speed_z,0.059532,0.089368,1.0,-0.02493,0.154063,0.030196,-0.049092,,0.304272,,-0.075695,-0.215699,-0.223814,-0.006436,-0.148004,-0.049426,0.15429,-0.009825,
s_rpm,0.905532,0.317538,-0.02493,1.0,0.683874,0.195181,-0.888272,,-0.032487,,-0.01483,-0.139471,-0.093707,0.022146,0.09845,-0.103285,0.70319,0.262149,
s_gear,0.906164,0.356833,0.154063,0.683874,1.0,0.160714,-0.832012,,0.094264,,-0.042022,-0.282137,-0.211707,0.032394,0.026046,-0.019984,0.993908,0.259663,
s_angle,0.184579,0.358307,0.030196,0.195181,0.160714,1.0,-0.173112,,0.032145,,0.013133,-0.058922,-0.014093,0.021736,0.053306,-0.044855,0.161436,0.40051,
s_z,-0.967646,-0.33526,-0.049092,-0.888272,-0.832012,-0.173112,1.0,,0.010067,,0.03444,0.208235,0.163131,-0.025284,0.100369,0.051178,-0.834912,-0.260364,
s_damage,,,,,,,,,,,,,,,,,,,
s_track_position,0.021412,0.252598,0.304272,-0.032487,0.094264,0.032145,0.010067,,1.0,,-0.02861,-0.150888,-0.120202,0.010454,-0.018901,-0.083733,0.091803,-0.191206,
s_race_position,,,,,,,,,,,,,,,,,,,


In [20]:
# Let's check
R = pearsonCorr(df['s_speed_y'], df['a_steer'])

display(R)

0.7162081208660015

In [21]:
# Let's double check
display(np.corrcoef(df['s_speed_y'], df['a_steer']))

array([[1.        , 0.71620812],
       [0.71620812, 1.        ]])