**Imports**

In [190]:
from pathlib import Path
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
# Principal Component Analysis
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline     
sns.set(color_codes=True)

**Data Collection**

In [191]:
# Import data
dfCollected = pd.read_csv("../torcs-ai_client_examples/scr-client-cpp/data/aalborg_2023-05-22.csv", sep=";")
type(dfCollected)
dfCollected.shape

(8406, 41)

**Data Inspection**

In [192]:
# To display the top 5 rows
dfCollected.head()

Unnamed: 0,m_client_name,m_timestamp,s_speed_x,s_speed_y,s_speed_z,s_rpm,s_gear,s_angle,s_z,s_damage,...,1,0.000000.1,2032.560059,-0.982000,0.000000.2,1.000000,0.000000.3,1.1,-0.444195,0.000000.4
0,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1100,0,0.003956,0.345637,0.0,...,,,,,,,,,,
1,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1263,0,0.003956,0.345637,0.0,...,,,,,,,,,,
2,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1432,0,0.003956,0.345637,0.0,...,,,,,,,,,,
3,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1605,0,0.003956,0.345637,0.0,...,,,,,,,,,,
4,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1783,0,0.003956,0.345637,0.0,...,,,,,,,,,,


In [193]:
# To display last 5 rows
dfCollected.tail()

Unnamed: 0,m_client_name,m_timestamp,s_speed_x,s_speed_y,s_speed_z,s_rpm,s_gear,s_angle,s_z,s_damage,...,1,0.000000.1,2032.560059,-0.982000,0.000000.2,1.000000,0.000000.3,1.1,-0.444195,0.000000.4
8401,SCR-CLIENT-CPP,1684747639,82.063599,-0.376508,0.00535,4666,3,-0.004572,0.342309,815.0,...,,,,,,,,,,
8402,SCR-CLIENT-CPP,1684747639,82.449097,-0.423506,-0.055585,4657,3,-0.002153,0.341961,815.0,...,,,,,,,,,,
8403,SCR-CLIENT-CPP,1684747639,82.847198,-0.336359,-0.007995,4586,3,0.00154,0.342237,815.0,...,,,,,,,,,,
8404,SCR-CLIENT-CPP,1684747639,83.216499,-0.282893,-0.019724,4603,3,0.005437,0.341482,815.0,...,,,,,,,,,,
8405,SCR-CLIENT-CPP,1684747639,83.599297,-0.201845,-0.001003,4486,3,0.008722,0.342287,815.0,...,,,,,,,,,,


In [194]:
dfCollected.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8406 entries, 0 to 8405
Data columns (total 41 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   m_client_name           8406 non-null   object 
 1   m_timestamp             8406 non-null   int64  
 2   s_speed_x               8406 non-null   float64
 3   s_speed_y               8406 non-null   float64
 4   s_speed_z               8406 non-null   float64
 5   s_rpm                   8406 non-null   int64  
 6   s_gear                  8406 non-null   int64  
 7   s_angle                 8406 non-null   float64
 8   s_z                     8406 non-null   float64
 9   s_damage                8406 non-null   float64
 10  s_track_position        8406 non-null   float64
 11  s_race_position         8406 non-null   int64  
 12  s_distance_raced        8406 non-null   float64
 13  s_distance_from_start   8406 non-null   float64
 14  s_current_laptime       8406 non-null   

In [195]:
# Check data some more
dfCollected.describe()

Unnamed: 0,m_timestamp,s_speed_x,s_speed_y,s_speed_z,s_rpm,s_gear,s_angle,s_z,s_damage,s_track_position,...,1,0.000000.1,2032.560059,-0.982000,0.000000.2,1.000000,0.000000.3,1.1,-0.444195,0.000000.4
count,8406.0,8406.0,8406.0,8406.0,8406.0,8406.0,8406.0,8406.0,8406.0,8406.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,1684748000.0,84.681143,0.323207,-0.001711,4536.26029,2.936712,0.009986,0.341268,672.746134,0.042174,...,,,,,,,,,,
std,54.24398,33.134316,0.645187,2.532028,1022.028234,0.822811,0.047777,0.004691,205.100176,0.181828,...,,,,,,,,,,
min,1684747000.0,-0.008275,-10.0354,-12.0012,942.0,0.0,-0.1893,0.314428,0.0,-0.512339,...,,,,,,,,,,
25%,1684748000.0,57.407151,-0.006477,-0.065417,3791.0,2.0,-0.00922,0.339088,482.0,-0.058187,...,,,,,,,,,,
50%,1684748000.0,79.482903,0.272377,-0.000986,4410.0,3.0,0.007196,0.342447,815.0,-0.025388,...,,,,,,,,,,
75%,1684748000.0,101.832251,0.683625,0.077094,5061.75,4.0,0.017154,0.344395,815.0,0.110851,...,,,,,,,,,,
max,1684748000.0,149.578003,13.2897,11.9542,9583.0,4.0,0.320524,0.350762,815.0,0.54578,...,,,,,,,,,,


**Feature Selection**

In [196]:
features = ['s_speed_x', 's_speed_y','s_angle','s_track_position']
X = dfCollected.loc[:, features].values
X.shape
type(X)

numpy.ndarray

**Define Target**

In [198]:
target = ['a_acceleration', 'a_brake', 'a_steer']
y = dfCollected.loc[:, target].values

In [199]:
pcaTransform = PCA(n_components=2)

In [200]:
pcaComponents = pcaTransform.fit_transform(X)

**Normalize Data**

In [201]:
X = StandardScaler().fit_transform(X)

**Feature Extraction**

In [203]:
principalDf = pd.DataFrame(data=pcaComponents, columns=['PCA1', 'PCA2'])
principalDf.head()
extractedDf = pd.concat( [principalDf, dfCollected], axis=1)
extractedDf.shape
extractedDf.head()

Unnamed: 0,PCA1,PCA2,m_client_name,m_timestamp,s_speed_x,s_speed_y,s_speed_z,s_rpm,s_gear,s_angle,...,1,0.000000.1,2032.560059,-0.982000,0.000000.2,1.000000,0.000000.3,1.1,-0.444195,0.000000.4
0,84.685083,-0.907262,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1100,0,0.003956,...,,,,,,,,,,
1,84.685083,-0.907262,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1263,0,0.003956,...,,,,,,,,,,
2,84.685083,-0.907262,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1432,0,0.003956,...,,,,,,,,,,
3,84.685083,-0.907262,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1605,0,0.003956,...,,,,,,,,,,
4,84.685083,-0.907262,SCR-CLIENT-CPP,1684747453,-0.008275,-0.025208,-0.000158,1783,0,0.003956,...,,,,,,,,,,


**Data Visualization**

In [None]:
ax.set