<a href="https://colab.research.google.com/github/NumanAloko/ML-for-CFS-Built-up-Columns/blob/main/RF_Classification_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Training and testing of Random Forest (RF) classification ML model**

In [None]:
# Import Python Packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, learning_curve
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

In [None]:
# Create Dataframe
csv_url= "https://raw.githubusercontent.com/NumanAloko/ML-for-CFS-Built-up-Columns/refs/heads/main/CFS_Built-up_Columns_ML_Dataset.csv"
df = pd.read_csv(csv_url, header=0)
df = df.dropna(how='all').dropna(axis=1, how='all')
# Encode the 'Failure modes' column
df['FM'] = df['FM'].str.strip()
le = LabelEncoder()
df['FM'] = le.fit_transform(df['FM'])

data_x = df[['L', 't', 'h', 'b','Fy', 'λc', 'λ(le-d)']]
X = pd.DataFrame(df, columns=data_x.columns)

# Scale the data
sc_X = StandardScaler()
X_scaled = sc_X.fit_transform(X)

# Split the data into train and test sets for the classification task
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_scaled, df['FM'], test_size=0.3, random_state=123)

# Print the shapes of the resulting datasets to verify
print("X_train_class shape:", X_train_class.shape)
print("X_test_class shape:", X_test_class.shape)
print("y_train_class shape:", y_train_class.shape)
print("y_test_class shape:", y_test_class.shape)

In [None]:
# Hyperparameter tuning based on Grid search method
# Define the parameter grid
#param_grid = {
    #'n_estimators': [100, 200, 300],
    #'max_depth': [3, 4, 5, 6, None],
    #'min_samples_split': [2, 5, 10],
    #'min_samples_leaf': [1, 2, 4],
    #'bootstrap': [True, False]
#}

# Optimised Hyperparameters (Best parameters selected after tuning)
param_grid = {
    'n_estimators': [100],
    'max_depth': [None],
    'min_samples_split': [5],
    'min_samples_leaf': [1],
    'bootstrap': [False]
}

# Define the Random forest (RF)  classification model
rf = RandomForestClassifier(random_state=123)
clf = GridSearchCV(estimator=rf, cv=10, param_grid=param_grid, n_jobs=-1)

# Train the classifier on feature and target data
clf.fit(X_train_class, y_train_class)

# View the accuracy score
print('Best score for data:', clf.best_score_)

# View the best parameters for the model found using grid search
print('Best n_estimators:', clf.best_estimator_.n_estimators)
print('Best max_depth:', clf.best_estimator_.max_depth)
print('Best min_samples_split:', clf.best_estimator_.min_samples_split)
print('Best min_samples_leaf:', clf.best_estimator_.min_samples_leaf)
print('Best bootstrap:', clf.best_estimator_.bootstrap)

# Make predictions
y_pred_class_train = clf.predict(X_train_class)
y_pred_class_test = clf.predict(X_test_class)

In [None]:
############################################ Performance Metrics ##########################
# Calculate accuracy
accuracy_train = accuracy_score(y_train_class, y_pred_class_train)
accuracy_test = accuracy_score(y_test_class, y_pred_class_test)

print('Training accuracy: {:.2f}'.format(accuracy_train))
print('Test accuracy: {:.2f}'.format(accuracy_test))

# Print confusion matrix
print('Confusion matrix:')
print(confusion_matrix(y_test_class, y_pred_class_test))

# Print classification report
print('Classification report:')
print(classification_report(y_test_class, y_pred_class_test))

#  class names (Inverse Encoding)
class_names = le.inverse_transform(np.unique(y_test_class))

# Print the class names
print('Classes:', class_names)
# Get the mapping of class names to numbers
class_mapping = {label: index for index, label in enumerate(le.classes_)}
# Print the mapping
print('Encoding:', class_mapping)

**Classify the failure mode of a new instance after training of RF ML Model**

In [None]:
# Make new prediction
# Manually input the new data

new_data = {
    'L': [1500],               # Length(mm)
    't': [1.5],                # Thickness of the section (mm)
    'h': [175],                # Height of the section (mm)
    'b': [65],                 # Flange of the section (mm)
    'Fy': [450],               # Yield stress
    'λc': [0.38],              # Global Slenderness
    'λ(le-d)': [2.36],         # Sectional Slenderness
}

new_input_data = pd.DataFrame(new_data)

# Ensure the new input data has the same columns as the training data
new_input_data = new_input_data[data_x.columns]

# Scale the new input data using the same scaler used for training data
sc_X = StandardScaler()
sc_X.fit(X)  # Fit the scaler on the original training data
new_input_scaled = sc_X.transform(new_input_data)

# Make predictions
predicted_class = clf.predict(new_input_scaled)
predicted_class_decoded = le.inverse_transform(predicted_class)

print('Predicted class:', predicted_class_decoded[0])

**Interpretation of the new instance**

In [None]:
# RF Classification Model Explainer
!pip install lime
import lime
import lime.lime_tabular

# Create a LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train_class,
    feature_names=X.columns,
    class_names=class_names,
    mode='classification'
)

# New input data
exp = explainer.explain_instance(
    data_row=new_input_scaled[0],
    predict_fn=clf.predict_proba, top_labels=1
)

# Show the explanation
exp.show_in_notebook(show_table=True, show_all=False)