# **Imports**

In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder

# **Reading Data**

In [2]:
df = pd.read_csv('../Data/final_combined_data.csv')

# **Feature Engineering**

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Latitude                   5000 non-null   float64
 1   Longitude                  5000 non-null   float64
 2   Altitude                   5000 non-null   float64
 3   Distance                   5000 non-null   float64
 4   Alignment                  5000 non-null   bool   
 5   Distance_Sensors           5000 non-null   float64
 6   Proximity                  5000 non-null   bool   
 7   Wheel_Speed                5000 non-null   float64
 8   Velocity                   5000 non-null   float64
 9   Hall_Effect                5000 non-null   bool   
 10  Emergency_Stop             5000 non-null   bool   
 11  Temperature                5000 non-null   float64
 12  Humidity                   5000 non-null   float64
 13  Light_Level                5000 non-null   float

In [4]:
df.columns

Index(['Latitude', 'Longitude', 'Altitude', 'Distance', 'Alignment',
       'Distance_Sensors', 'Proximity', 'Wheel_Speed', 'Velocity',
       'Hall_Effect', 'Emergency_Stop', 'Temperature', 'Humidity',
       'Light_Level', 'Alignment_Label', 'Safety_Label',
       'Collision_Avoidance_Label', 'Chaining_Unchaining_Label'],
      dtype='object')

In [6]:
# Relative Position and Angle
df["Relative_Distance"] = df["Distance"].diff()
df["Relative_Angle"] = df["Longitude"].diff()

In [7]:
# Relative Velocity
df["Relative_Velocity"] = df["Velocity"].diff()

In [8]:
# Time Series Features
window_size = 10
df["Rolling_Mean_Wheel_Speed"] = df["Wheel_Speed"].rolling(window=window_size).mean()
df["Rolling_Max_Distance"] = df["Distance"].rolling(window=window_size).max()

In [10]:
# Safety and Emergency Features
df["Safety_Activation"] = (df["Hall_Effect"] | df["Emergency_Stop"]).astype(int)
df["Safety_Duration"] = df.groupby((df["Safety_Activation"] != df["Safety_Activation"].shift()).cumsum())["Safety_Activation"].cumcount()


In [11]:
# Environmental Conditions Interaction
df["Temp_Humidity_Interact"] = df["Temperature"] * df["Humidity"]
df["Light_Temp_Interact"] = df["Light_Level"] * df["Temperature"]

In [12]:
# Alignment and Proximity Trends
window_size = 10
df["Alignment_Percentage"] = df["Alignment"].rolling(window=window_size).mean() * 100
df["Proximity_Trend"] = df["Proximity"].rolling(window=window_size).sum()

In [13]:
# Interaction Features
df["Alignment_Velocity_Interaction"] = df["Alignment"] * df["Velocity"]

In [14]:
# Estimate time to collision
df["Time_To_Collision"] = df["Distance"] / df["Velocity"]

In [None]:
# Save the updated combined data with new features to a new CSV file
# df.to_csv("combined_data_with_features.csv", index=False)

print("Combined data with new features saved.")

In [15]:
df.columns

Index(['Latitude', 'Longitude', 'Altitude', 'Distance', 'Alignment',
       'Distance_Sensors', 'Proximity', 'Wheel_Speed', 'Velocity',
       'Hall_Effect', 'Emergency_Stop', 'Temperature', 'Humidity',
       'Light_Level', 'Alignment_Label', 'Safety_Label',
       'Collision_Avoidance_Label', 'Chaining_Unchaining_Label',
       'Relative_Distance', 'Relative_Angle', 'Relative_Velocity',
       'Rolling_Mean_Wheel_Speed', 'Rolling_Max_Distance', 'Safety_Activation',
       'Safety_Duration', 'Temp_Humidity_Interact', 'Light_Temp_Interact',
       'Alignment_Percentage', 'Proximity_Trend',
       'Alignment_Velocity_Interaction', 'Time_To_Collision'],
      dtype='object')

In [17]:
df.shape

(5000, 31)

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 31 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Latitude                        5000 non-null   float64
 1   Longitude                       5000 non-null   float64
 2   Altitude                        5000 non-null   float64
 3   Distance                        5000 non-null   float64
 4   Alignment                       5000 non-null   bool   
 5   Distance_Sensors                5000 non-null   float64
 6   Proximity                       5000 non-null   bool   
 7   Wheel_Speed                     5000 non-null   float64
 8   Velocity                        5000 non-null   float64
 9   Hall_Effect                     5000 non-null   bool   
 10  Emergency_Stop                  5000 non-null   bool   
 11  Temperature                     5000 non-null   float64
 12  Humidity                        50

# **Prepping data for Prediction**

In [23]:
# Define the categorical columns for one-hot encoding
categorical_columns = ['Alignment_Label', 'Safety_Label', 'Collision_Avoidance_Label']

In [27]:
# Apply one-hot encoding to categorical columns
encoder = OneHotEncoder(sparse=False, drop="first")  # Drop the first category to avoid multicollinearity
encoded_features = encoder.fit_transform(df[categorical_columns])



In [30]:
# Get the category names from the encoder
category_names = encoder.get_feature_names_out(input_features=categorical_columns)

In [31]:
# Create a DataFrame for the encoded features with proper column names
encoded_df = pd.DataFrame(encoded_features, columns=category_names)

In [None]:
# Concatenate the encoded features with the original data
data_encoded = pd.concat([df, encoded_df], axis=1)

# **Machine Learning**

In [19]:
# Separate features and target variable
X = df.drop(columns=["Chaining_Unchaining_Label"])
y = df["Chaining_Unchaining_Label"]

In [20]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [21]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

ValueError: could not convert string to float: 'Not_Aligned'

In [None]:
# Initialize a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=101)

In [None]:
# Train the classifier
model.fit(X_train_scaled, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error: ", mae)

rmse = mean_squared_error(y_test, y_pred, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print("Mean Absolute Percentage Error (MAPE):", mape)

In [None]:
test_data = pd.concat([X_test, y_test], axis=1)
test_data['predicted_condition'] = y_pred
test_data

In [None]:
test_data1=test_data.sort_values('Chaining_Unchaining_Label', ascending=True)
plt.plot(test_data1['Chaining_Unchaining_Label'], test_data1['predicted_condition'])
plt.plot(test_data1['Chaining_Unchaining_Label'], test_data1['Chaining_Unchaining_Label'])

In [None]:
# Get feature importances
feature_importances = pd.DataFrame(clf.feature_importances_, index=train_data.columns, columns=["Importance"]).sort_values("Importance", ascending=False)

# Plot the feature importances
plt.figure(figsize=(10, 6))
plt.barh(feature_importances.index, feature_importances["Importance"])
plt.xlabel("Feature Importance")
plt.ylabel("Features")
plt.title("Random Forest Feature Importances")
plt.show()