# PathPilot ML training
##### Author: [Joseph Selva Raj]

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from micromlgen import port

### Loading the data

Load the LIDAR measurement data from txt file

In [None]:
data = pd.read_csv('C:\\Users\\josep\\Documents\\Github Repo\\PathPilot\\PathPilot\\MASTERDATA360.TXT', header=None)
print(data.head())

### Data cleaning

Rename the last column as "label" and clean the data by eliminating all data strings that are not annotated with "Forward" command labels.

The processed data should only contain the LIDAR measurements and the corresponding command labels:
- F - forward
- R - forward right
- L - forward left

In [None]:
data.rename(columns={data.columns[-1]: 'Label'}, inplace=True)
print(f"Label counts before cleaning the data: \n {data['Label'].value_counts()}")
data = data[data['Label'].isin(['F', 'L', 'R'])]
data.reset_index(drop=True, inplace=True)
print(f"Label counts after cleaning the data: \n {data['Label'].value_counts()}")

### Spilt data into train and test sets
Separate X and Y as the input and output data and divide them into train and test sets with train_test_split. 
Label encoder is used to convert the labels from character to number format to interface with the classifier.

In [None]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(f"Label encoding mapping for motor control in Arduino code: {label_mapping}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Training the model
Training the model is a straightforward process, thanks to all the libraries available in Python. The outcome of the training process depends on the dataset and the preceding steps. Post-training, accuracy will be computed using the test set, and a higher accuracy is desirable.

In [None]:
clf = MLPClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

class_names = label_encoder.classes_
report = classification_report(y_test, y_pred, target_names=class_names, zero_division=0)
print('Classification Report:\n', report)