In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("transformed_dataset.csv")

In [None]:
# SVM without SMOTE, mcc scorer

import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import cohen_kappa_score
import seaborn as sns
from sklearn.metrics import matthews_corrcoef, make_scorer

X = df.loc[:, 'Precipitation':'Slope']
y = df['Severity_Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)

# SVM model with a One vs One scheme
svc = SVC(decision_function_shape='ovo', probability=True)

# Grid search parameters

param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
}

# Set up the StratifiedKFold
cv = StratifiedKFold(n_splits=5)

# Set up the GridSearchCV with the SVM model, parameter grid, and stratified cross-validation
grid_search = GridSearchCV(svc, param_grid, cv=cv, scoring=make_scorer(matthews_corrcoef), verbose=1, n_jobs=-1)

# Fit the model with the training data
grid_search.fit(X_train, y_train)

print("Best model parameters:", grid_search.best_params_)
print("Training MCC:", grid_search.best_score_)

# Make predictions using the best model
y_pred = grid_search.best_estimator_.predict(X_test)

# Get probability estimates for each class
y_prob = grid_search.best_estimator_.predict_proba(X_test)

# Compute the multiclass AUC-ROC score
auc_roc = roc_auc_score(y_test, y_prob, average='macro', multi_class='ovo')

# Print the evaluation metrics
print("Test MCC:", matthews_corrcoef(y_test, y_pred))
print("AUC-ROC score:", auc_roc)

clf_report = classification_report(y_test, y_pred)
print("Classification report:\n", clf_report)

conf_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", conf_mat)

In [None]:
# Plot confusion matrix using Seaborn

class_labels = np.sort(y.unique())

plt.figure(figsize=(10, 7))
sns.set(font_scale=1.4)  # Increase font size
sns.heatmap(conf_mat, annot=True, annot_kws={"size": 16}, fmt='g', cmap='Blues', cbar=False, xticklabels=class_labels, yticklabels=class_labels)

plt.xlabel('Predicted Label', fontsize=18)
plt.ylabel('True Label', fontsize=18)
plt.title('Confusion Matrix for SVM Model', fontsize=20)
plt.show()

In [None]:
# XGBoost without SMOTE, mcc scorer 

import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, precision_recall_fscore_support, confusion_matrix

X = df.loc[:, 'Precipitation':'Slope']
y = df['Severity_Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)

# Encode class labels
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)


# XGBoost model
xgb = XGBClassifier()

# Grid search parameters

param_grid = {
    'n_estimators': [50, 100, 200, 500, 1000],
    'learning_rate': [0.001, 0.01, 0.1, 1],
    'max_depth': [3, 5, 7, 9]
}

# Set up the StratifiedKFold
cv = StratifiedKFold(n_splits=5)

# Set up the GridSearchCV with the XGBoost model, parameter grid, and stratified cross-validation
grid_search = GridSearchCV(xgb, param_grid, cv=cv, scoring=make_scorer(matthews_corrcoef), verbose=1, n_jobs=-1)

# Fit the model with the training data
grid_search.fit(X_train, y_train)

print("Best model parameters:", grid_search.best_params_)
print("Training MCC:", grid_search.best_score_)

# Make predictions using the best model
y_pred = grid_search.best_estimator_.predict(X_test)

# Get probability estimates for each class
y_prob = grid_search.best_estimator_.predict_proba(X_test)

# Compute the multiclass AUC-ROC score
auc_roc = roc_auc_score(y_test, y_prob, average='macro', multi_class='ovo')

# Print the evaluation metrics
print("Test MCC:", matthews_corrcoef(y_test, y_pred))
print("AUC-ROC score:", auc_roc)

clf_report = classification_report(y_test, y_pred, target_names = le.classes_)
print("Classification report:\n", clf_report)

conf_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", conf_mat)

In [None]:
# Plot confusion matrix using Seaborn

class_labels = le.classes_

plt.figure(figsize=(10, 7))
sns.set(font_scale=1.4)  # Increase font size
sns.heatmap(conf_mat, annot=True, annot_kws={"size": 16}, fmt='g', cmap='Blues', cbar=False, xticklabels=class_labels, yticklabels=class_labels)

plt.xlabel('Predicted Label', fontsize=18)
plt.ylabel('True Label', fontsize=18)
plt.title('Confusion Matrix for XGBoost Model', fontsize=20)
plt.show()

In [None]:
# SVM without SMOTE, mcc scorer

import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import cohen_kappa_score
import seaborn as sns
from sklearn.metrics import matthews_corrcoef, make_scorer

X = df.loc[:, 'Precipitation':'Slope']
y = df['Severity_Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)

# Determine the appropriate number of nearest neighbors for SMOTE
k_neighbors = min(min(y_train.value_counts()) - 1, 5)

smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# SVM model with a One vs One scheme
svc = SVC(decision_function_shape='ovo', probability=True)

# Grid search parameters

param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
}

# Set up the StratifiedKFold
cv = StratifiedKFold(n_splits=5)

# Set up the GridSearchCV with the SVM model, parameter grid, and stratified cross-validation
grid_search = GridSearchCV(svc, param_grid, cv=cv, scoring=make_scorer(matthews_corrcoef), verbose=1, n_jobs=-1)

# Fit the model with the training data
grid_search.fit(X_train_smote, y_train_smote)

print("Best model parameters:", grid_search.best_params_)
print("Training MCC:", grid_search.best_score_)

# Make predictions using the best model
y_pred = grid_search.best_estimator_.predict(X_test)

# Get probability estimates for each class
y_prob = grid_search.best_estimator_.predict_proba(X_test)

# Compute the multiclass AUC-ROC score
auc_roc = roc_auc_score(y_test, y_prob, average='macro', multi_class='ovo')

# Print the evaluation metrics
print("Test MCC:", matthews_corrcoef(y_test, y_pred))
print("AUC-ROC score:", auc_roc)

clf_report = classification_report(y_test, y_pred)
print("Classification report:\n", clf_report)

conf_mat = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", conf_mat)