In [93]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [94]:
# Step 1: Read the data
data = pd.read_csv('homework_exampledata.csv', delimiter=';')

In [95]:
# clean-up data from nan-values
data = data.dropna()

In [96]:
# show the shape of the data
data.shape

(49977, 6)

In [97]:
# Step 2: Pre-process the data
# Convert time to a relative time (it's not strictly necessary but sometimes helps)
data['time'] = (data['time'] - data['time'].min())

In [98]:
# Step 3: Split the dataset
features = data[['time', 'brake-value', 'yaw-value', 'longitudinal-acceleration', 'lateral-acceleration']]
labels = data['velocity-value']
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [99]:
# Z-normalization
from sklearn.preprocessing import StandardScaler

# Create the standardizer
scaler = StandardScaler()

# Fit on the training data
scaler.fit(X_train)

# Transform both the training and testing data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [54]:
# Min-max normalization
from sklearn.preprocessing import MinMaxScaler

# Create the scaler
scaler = MinMaxScaler()

# Fit on the training data
scaler.fit(X_train)

# Transform both the training and test data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [100]:
# shape of training set
X_train.shape, y_train.shape

((39981, 5), (39981,))

In [101]:
# shape of a test set
X_test.shape, y_test.shape

((9996, 5), (9996,))

# Appy Linear Regression

In [102]:
# Step 4: Train a regression model
model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

In [103]:
# Step 5: Evaluate the model
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 322.73665978714416


# Apply Random Forest

In [104]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [105]:
model = RandomForestRegressor(n_estimators=300, random_state=42)

In [106]:
# Fit the model
model.fit(X_train, y_train)

RandomForestRegressor(n_estimators=300, random_state=42)

In [107]:
# Evaluate the model
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error with Random Forest: {mse}')

Mean Squared Error with Random Forest: 5.825557230152839


# Apply Gradient Boosting

In [108]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

In [109]:
# Create a Gradient Boosting Regressor model
gb_model = GradientBoostingRegressor(n_estimators=300, learning_rate=0.1, max_depth=8, random_state=42)

In [110]:
# Fit the model to the training data
gb_model.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=8, n_estimators=300, random_state=42)

In [111]:
# Use the model to make predictions on the test data
gb_predictions = gb_model.predict(X_test)

In [112]:
# Calculate the mean squared error of the predictions
mse_gb = mean_squared_error(y_test, gb_predictions)

In [113]:
# Output the mean squared error
print(f'Mean Squared Error with Gradient Boosting: {mse_gb}')

Mean Squared Error with Gradient Boosting: 1.951895057907476


# Apply XGBoost

In [114]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [115]:
# Create an XGBoost model
xgb_model = xgb.XGBRegressor(objective ='reg:squarederror', n_estimators=300, learning_rate=0.1, max_depth=8, random_state=42)

In [116]:
# Fit the model to the training data
xgb_model.fit(X_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1, max_delta_step=0,
             max_depth=8, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=300, n_jobs=12,
             num_parallel_tree=1, predictor='auto', random_state=42,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [117]:
# Use the model to make predictions on the test data
xgb_predictions = xgb_model.predict(X_test)

In [118]:
# Calculate the mean squared error of the predictions
mse_xgb = mean_squared_error(y_test, xgb_predictions)

In [119]:
# Output the mean squared error
print(f'Mean Squared Error with XGBoost: {mse_xgb}')

Mean Squared Error with XGBoost: 1.9872652737726209


# Apply LightGBM

In [120]:
import lightgbm as lgb
from sklearn.metrics import mean_squared_error

In [121]:
# Create a LightGBM model
lgb_model = lgb.LGBMRegressor(n_estimators=300, learning_rate=0.1, max_depth=8, random_state=42)

In [122]:
# Fit the model to the training data
lgb_model.fit(X_train, y_train)



LGBMRegressor(max_depth=8, n_estimators=300, random_state=42)

In [123]:
# Use the model to make predictions on the test data
lgb_predictions = lgb_model.predict(X_test)

In [124]:
# Calculate the mean squared error of the predictions
mse_lgb = mean_squared_error(y_test, lgb_predictions)

In [125]:
# Output the mean squared error
print(f'Mean Squared Error with LightGBM: {mse_lgb}')

Mean Squared Error with LightGBM: 12.820186582456426


# Neural Networks

In [208]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np

In [209]:
X = data.values[:, 0:5] # features
y = data.values[:, 5] # labels(velocity)

In [210]:
type(X), type(y)

(numpy.ndarray, numpy.ndarray)

In [211]:
X.shape, y.shape

((49977, 5), (49977,))

In [212]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [213]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [214]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [215]:
# Define the neural network
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [216]:
# Initialize the model, loss function, and optimizer
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1

In [217]:
model = SimpleNN(input_dim, hidden_dim, output_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [218]:
# Training the model
num_epochs = 10000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs.squeeze(), y_train_tensor)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

Epoch [10/10000], Loss: 617.89697265625
Epoch [20/10000], Loss: 594.3485107421875
Epoch [30/10000], Loss: 563.5932006835938
Epoch [40/10000], Loss: 524.334228515625
Epoch [50/10000], Loss: 477.91119384765625
Epoch [60/10000], Loss: 428.2763671875
Epoch [70/10000], Loss: 381.4322509765625
Epoch [80/10000], Loss: 344.00408935546875
Epoch [90/10000], Loss: 319.67864990234375
Epoch [100/10000], Loss: 307.0384216308594
Epoch [110/10000], Loss: 301.1793212890625
Epoch [120/10000], Loss: 297.4721984863281
Epoch [130/10000], Loss: 294.13421630859375
Epoch [140/10000], Loss: 291.0730285644531
Epoch [150/10000], Loss: 288.4329528808594
Epoch [160/10000], Loss: 286.1910400390625
Epoch [170/10000], Loss: 284.2213134765625
Epoch [180/10000], Loss: 282.494140625
Epoch [190/10000], Loss: 281.0892028808594
Epoch [200/10000], Loss: 279.87603759765625
Epoch [210/10000], Loss: 278.7787780761719
Epoch [220/10000], Loss: 277.78216552734375
Epoch [230/10000], Loss: 276.90399169921875
Epoch [240/10000], Loss

In [219]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor).squeeze().numpy()
    mse = mean_squared_error(y_test, predictions)
    print(f'Mean Squared Error with Neural Network: {mse}')

Mean Squared Error with Neural Network: 149.9329547691226
