In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# I'm using this guide as my reference: https://towardsdatascience.com/random-forest-in-python-24d0893d51c0

## Import CSV file

In [None]:
features = pd.read_csv('output.csv')
# print("Full list of all column names:")
# for signal in features.columns:
#     print(signal)

## Pre-Processing of Dataset

The lateral slip in Dyna4 $s_y$ is defined as:
$s_y = \frac{v_y - (r_{dyn} \cdot \Omega)}{\mid r_{dyn} \cdot \Omega \mid \cdot v_{num}}$
For more information, see the 'Slip Calculation' help document from Dyna4.

For a free rolling tire the following relation between lateral slip  and slip angle $\alpha$ is:
$\alpha = \arctan(s_y)$
This is the equation, we have to apply to the signal called TireLateralSlip for each wheel.

## Define Targets and Features, and Split on Training and Test Data

In [None]:
# Separate features and targets
targets = np.array(features['VehicleSideSlip'])

# Remove the targets from the features
features= features.drop('VehicleSideSlip', axis = 1)

# Saving feature names for later use
feature_list = list(features.columns)

# Convert to numpy array
features = np.array(features)

# Split the data into training and testing sets
train_features, test_features, train_targets, test_targets = train_test_split(features, targets, test_size = 0.25, random_state = 42)

# Checking that the shapes look correct
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_targets.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_targets.shape)

## Train and Test Model

In [None]:
# Instantiate model with 1000 decision trees
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
# Train the model on training data
rf.fit(train_features, train_targets)

# Use the forest's predict method on the test data
predictions = rf.predict(test_features)
# Calculate the absolute errors
errors = abs(predictions - test_targets)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

# Calculate mean absolute percentage error (MAPE)
# mape = 100 * (errors / test_targets)
# # Calculate and display accuracy
# accuracy = 100 - np.mean(mape)
# print('Accuracy:', round(accuracy, 2), '%.')

## Post Analysis of Results

In [None]:
# Get numerical feature importances
importances = list(rf.feature_importances_)
# List of tuples with variable and importance
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort the feature importances by most important first
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
# Print out the feature and importances 
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances];

# Set the style
plt.style.use('fivethirtyeight')
# list of x locations for plotting
x_values = list(range(len(importances)))
# Make a bar chart
plt.bar(x_values, importances, orientation = 'vertical')
# Tick labels for x axis
plt.xticks(x_values, feature_list, rotation='vertical')
# Axis labels and title
plt.ylabel('Importance'); plt.xlabel('Variable'); plt.title('Variable Importances');