In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd

# Load the datasets
X_train = pd.read_csv('/content/drive/MyDrive/Modeling-Earthquake-Damage-main/Resources/Processed data/X_train_processed.csv', header = None)
y_train = pd.read_csv('/content/drive/MyDrive/Modeling-Earthquake-Damage-main/Resources/Processed data/y_train_processed.csv')
X_test = pd.read_csv('/content/drive/MyDrive/Modeling-Earthquake-Damage-main/Resources/Processed data/X_test_processed.csv', header = None)
y_test = pd.read_csv('/content/drive/MyDrive/Modeling-Earthquake-Damage-main/Resources/Processed data/y_test_processed.csv')

X_train.head(), y_train.head(), X_test.head(), y_test.head()

if isinstance(y_train, pd.DataFrame):
    y_train = y_train['damage_grade']
if isinstance(y_test, pd.DataFrame):
    y_test = y_test['damage_grade']

y_train = y_train.squeeze()
y_test = y_test.squeeze()


In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Define the model
rf = RandomForestClassifier()

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of trees
    'max_depth': [None, 10, 20, 30],  # Maximum depth of trees
    'min_samples_split': [2, 5, 10]  # Minimum number of samples required to split an internal node
}

# Setup Grid Search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1)

# Fit the model
grid_search.fit(X_train, y_train)

# Best parameters
print("Best parameters:", grid_search.best_params_)



Best parameters: {'max_depth': 30, 'min_samples_split': 10, 'n_estimators': 200}


In [5]:
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

# Calculate the weighted F1 score
weighted_f1_score = f1_score(y_test, y_pred, average='weighted')

weighted_f1_score


0.6664291342265066

In [7]:
from sklearn.metrics import accuracy_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

accuracy

0.6799562556359241

In [8]:
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

conf_matrix

array([[ 1930,  2956,   139],
       [ 1005, 24886,  3761],
       [   91,  8729,  8624]])

In [9]:
from sklearn.metrics import classification_report

# Generate a classification report
class_report = classification_report(y_test, y_pred)

print(class_report)

              precision    recall  f1-score   support

           1       0.64      0.38      0.48      5025
           2       0.68      0.84      0.75     29652
           3       0.69      0.49      0.58     17444

    accuracy                           0.68     52121
   macro avg       0.67      0.57      0.60     52121
weighted avg       0.68      0.68      0.67     52121

