In [2]:
import pandas as pd

# Load the dataset
dataset = pd.read_csv("G-Speedway.csv")

# Display the first 5 rows
print("--- First 5 rows ---")
print(dataset.head())

# Get information about columns and data types
print("\n--- Column Info ---")
dataset.info()

# Get descriptive statistics for numerical columns
print("\n--- Numerical Column Stats ---")
print(dataset.describe())

--- First 5 rows ---
     speedX    speedY    speedZ      rpm  fuel  damage  sensor_gear  racePos  \
0 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
1 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
2 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
3 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
4 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   

   distFromStart  distRaced  ...  wheelSpinVel_1  wheelSpinVel_2  \
0         2952.6        0.0  ...             0.0             0.0   
1         2952.6        0.0  ...             0.0             0.0   
2         2952.6        0.0  ...             0.0             0.0   
3         2952.6        0.0  ...             0.0             0.0   
4         2952.6        0.0  ...             0.0             0.0   

   wheelSpinVel_3  accel  brake  steer  control_gear  clutch  focus  meta  
0            

In [3]:
# 1. Drop 'meta', 'track_*', and 'opponents_*' columns
columns_to_drop = ['meta'] + [col for col in dataset.columns if col.startswith('track_')] + [col for col in dataset.columns if col.startswith('opponents_')]
dataset = dataset.drop(columns=columns_to_drop)

# 2. Convert 'control_gear' to integer
dataset['control_gear'] = dataset['control_gear'].astype(int)

# 3. One-hot encode 'control_gear'
dataset = pd.get_dummies(dataset, columns=['control_gear'], prefix='gear')

# Display the first 5 rows and info to check the changes
print("--- First 5 rows after dropping and encoding ---")
print(dataset.head())
print("\n--- Dataset Info after dropping and encoding ---")
dataset.info()

--- First 5 rows after dropping and encoding ---
     speedX    speedY    speedZ      rpm  fuel  damage  sensor_gear  racePos  \
0 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
1 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
2 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
3 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   
4 -0.001666  0.106768 -0.032091  942.478  94.0     1.0            0        1   

   distFromStart  distRaced  ...  wheelSpinVel_0  wheelSpinVel_1  \
0         2952.6        0.0  ...             0.0             0.0   
1         2952.6        0.0  ...             0.0             0.0   
2         2952.6        0.0  ...             0.0             0.0   
3         2952.6        0.0  ...             0.0             0.0   
4         2952.6        0.0  ...             0.0             0.0   

   wheelSpinVel_2  wheelSpinVel_3  accel  brake  steer  clutc

In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler  # Import MinMaxScaler

# 1. Convert gear_* columns to int8
gear_cols = [col for col in dataset.columns if col.startswith('gear_')]
dataset[gear_cols] = dataset[gear_cols].astype('int8')

# 2. Identify numerical columns (excluding the one-hot encoded gear columns)
numerical_cols = [col for col in dataset.columns if dataset[col].dtype in ['float64', 'int64'] and not col.startswith('gear_')]

# 3. Apply MinMaxScaler to numerical columns
scaler = MinMaxScaler()
dataset[numerical_cols] = scaler.fit_transform(dataset[numerical_cols])

# 4. Final Check
print("\n--- First 5 rows after scaling ---")
print(dataset.head())
print("\n--- Dataset Info after scaling ---")
dataset.info()


--- First 5 rows after scaling ---
     speedX    speedY   speedZ       rpm  fuel  damage  sensor_gear  racePos  \
0  0.415844  0.532376  0.52362  0.090295   1.0     0.0     0.142857      0.0   
1  0.415844  0.532376  0.52362  0.090295   1.0     0.0     0.142857      0.0   
2  0.415844  0.532376  0.52362  0.090295   1.0     0.0     0.142857      0.0   
3  0.415844  0.532376  0.52362  0.090295   1.0     0.0     0.142857      0.0   
4  0.415844  0.532376  0.52362  0.090295   1.0     0.0     0.142857      0.0   

   distFromStart  distRaced  ...  wheelSpinVel_0  wheelSpinVel_1  \
0       0.991631        0.0  ...        0.385991        0.413626   
1       0.991631        0.0  ...        0.385991        0.413626   
2       0.991631        0.0  ...        0.385991        0.413626   
3       0.991631        0.0  ...        0.385991        0.413626   
4       0.991631        0.0  ...        0.385991        0.413626   

   wheelSpinVel_2  wheelSpinVel_3  accel  brake  steer  clutch  focus  gea

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

# 1. Separate features (X) and target (y)
target_cols = ['accel', 'brake', 'steer', 'clutch', 'focus'] + [col for col in dataset.columns if col.startswith('gear_')]
X = dataset.drop(columns=target_cols)
y = dataset[target_cols]

# 2. Reshape X for RNN input [samples, time steps, features]
X_reshaped = X.values.reshape(X.shape[0], 1, X.shape[1])  # Assuming 1 time step

# 3. Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# 4. Build the RNN model
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(y_train.shape[1], activation='linear'))  # Linear activation for regression

# 5. Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# 6. Model Summary
model.summary()

  super().__init__(**kwargs)


In [6]:
# 1. Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)

# 2. Evaluate the model on the test set
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# 3. Make predictions
y_pred = model.predict(X_test)

# 4. Print a few predictions
print("\n--- Sample Predictions (first 5) ---")
print(y_pred[:5])

Epoch 1/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0684 - val_loss: 0.0247
Epoch 2/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0238 - val_loss: 0.0235
Epoch 3/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0231 - val_loss: 0.0229
Epoch 4/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0222 - val_loss: 0.0222
Epoch 5/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0224 - val_loss: 0.0218
Epoch 6/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0213 - val_loss: 0.0215
Epoch 7/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0212 - val_loss: 0.0212
Epoch 8/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0213 - val_loss: 0.0210
Epoch 9/20
[1m710/710[0m [32m━━━━━━━━

In [8]:
# 1. Print data types of the relevant variables
print("\n--- Data Types ---")
print("Type of y_pred_continuous:", type(y_pred_continuous))
print("Type of y_test_continuous:", type(y_test_continuous))
print("Data type of elements in y_pred_continuous:", y_pred_continuous.dtype)
print("Data type of elements in y_test_continuous:", y_test_continuous.iloc[0].dtype)

# 2. Print shapes of the variables
print("\n--- Shapes ---")
print("Shape of y_pred_continuous:", y_pred_continuous.shape)
print("Shape of y_test_continuous:", y_test_continuous.shape)


--- Data Types ---
Type of y_pred_continuous: <class 'numpy.ndarray'>
Type of y_test_continuous: <class 'pandas.core.frame.DataFrame'>
Data type of elements in y_pred_continuous: float32
Data type of elements in y_test_continuous: float64

--- Shapes ---
Shape of y_pred_continuous: (6308, 0)
Shape of y_test_continuous: (6308, 0)


In [9]:
import numpy as np
from sklearn.metrics import mean_absolute_error, r2_score

# 1. Extract gear predictions and convert to single values
gear_pred_one_hot = y_pred[:, -6:]
gear_pred = np.argmax(gear_pred_one_hot, axis=1)

gear_true_one_hot = y_test.iloc[:, -6:]
gear_true = np.argmax(gear_true_one_hot.values, axis=1)

# 2. Extract predictions for continuous variables (CORRECTED)
y_pred_continuous = y_pred[:, :5]  # First 5 columns
y_test_continuous = y_test.iloc[:, :5]

# 3. Calculate evaluation metrics
mae = mean_absolute_error(y_test_continuous, y_pred_continuous)
r2 = r2_score(y_test_continuous, y_pred_continuous)
accuracy = np.mean(gear_pred == gear_true)

print("\n--- Evaluation Metrics ---")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2): {r2}")
print(f"Gear Prediction Accuracy: {accuracy}")


--- Evaluation Metrics ---
Mean Absolute Error (MAE): 0.07015375792980194
R-squared (R2): 0.11659787595272064
Gear Prediction Accuracy: 0.24143944197844008


LTMS

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# 1. Build the RNN model with two LSTM layers
model_complex = Sequential()
model_complex.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))  # First LSTM, return sequences
model_complex.add(LSTM(64))  # Second LSTM
model_complex.add(Dense(y_train.shape[1], activation='linear'))

# 2. Compile the model
model_complex.compile(optimizer='adam', loss='mean_squared_error')

# 3. Model Summary
model_complex.summary()

# 4. Train the model
history_complex = model_complex.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

# 5. Evaluate the model
loss_complex = model_complex.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Loss (Complex Model): {loss_complex}")

# 6. Make predictions
y_pred_complex = model_complex.predict(X_test, verbose=0)

# 7. Print a few predictions
print("\n--- Sample Predictions (Complex Model, first 5) ---")
print(y_pred_complex[:5])

  super().__init__(**kwargs)


Epoch 1/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 0.0744 - val_loss: 0.0239
Epoch 2/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0235 - val_loss: 0.0228
Epoch 3/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0230 - val_loss: 0.0222
Epoch 4/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0219 - val_loss: 0.0228
Epoch 5/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0213 - val_loss: 0.0220
Epoch 6/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 0.0209 - val_loss: 0.0209
Epoch 7/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0206 - val_loss: 0.0208
Epoch 8/20
[1m710/710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 0.0205 - val_loss: 0.0203
Epoch 9/20
[1m710/710[0m [32m━━━━━━━━