# RF Model Training

## imports

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
from sklearn.model_selection import LeaveOneOut
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

## code

In [None]:
task_name = 'ZL_trace' 
# [ZL_trace, ZL_predict, PL_trace, PL_predict]

In [None]:
f=pd.read_csv(f'Datasets/Full_DS/{task_name}.csv')
df = f.to_numpy()
df.shape

In [None]:
#ZLT  flist = [6, 7, 20, 26, 28, 29]
#ZLP flist = [2, 6, 8, 24]
#PLT flist = [0, 1, 3, 6, 8, 14, 15, 21, 22, 23]
#PLP flist = [2, 3, 4, 6, 8, 10, 14, 23, 24, 26, 28]

flist = [2, 3, 4, 6, 8, 10, 14, 23, 24, 26, 28]
changed_list = [n+1 for n in flist]

In [None]:
X = df[:,changed_list]
y =f.iloc[:,0]

y=y.astype('int')
y.value_counts()

In [None]:
#ZLT {'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 50}
#ZLP {'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 400}
#PLT {'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
#PLP {'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
params = eval("{'bootstrap': True, 'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}")

In [None]:
stdsc = StandardScaler()
model = RandomForestClassifier(**params, random_state=0)

# Create the pipeline
pipeline = Pipeline([
    ('scaler', stdsc),
    ('classifier', model)
])

# Create LOOCV procedure
cv = LeaveOneOut()

# Enumerate splits
y_true, y_pred = list(), list()
for train_ix, test_ix in cv.split(X):
    # Split data
    X_train, X_test = X[train_ix, :], X[test_ix, :]
    y_train, y_test = y[train_ix], y[test_ix]

    # Here you fit your pipeline to the training data
    pipeline.fit(X_train, y_train)

    # Evaluate model
    yhat = pipeline.predict(X_test)

    # Store predictions
    y_pred.append(yhat[0])

# Calculate accuracy
acc = accuracy_score(y, y_pred)
print('Accuracy: %.3f' % acc)

# Save the entire pipeline to disk
filename = f"RF_Models/{task_name}.joblib"
joblib.dump(pipeline, filename)
print(f"Complete pipeline saved to {filename}")

## Confusion Matrix 

In [None]:
plt.figure()  # This ensures a new figure is created for each confusion matrix
cm = confusion_matrix(y, y_pred)
sns.heatmap(cm, annot=True)  # 'g' ensures numerical formatting
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title(task_name+' Confusion Matrix')
plt.show()