In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
# Apply metrics
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    precision_score, recall_score, roc_auc_score
)

In [2]:
# load dataset
df = pd.read_csv('data/processed-data.csv')
df = pd.get_dummies(df, columns=['ph', 'rainfall'])

# split the dataframe into features (x) and labels (y)
x = df.drop(columns = ['label'])
y = df['label']

In [3]:
# split the data to 90-10 where 90% is for training and testing while the remaining 10% is for unseen data
x_seen, x_unseen, y_seen, y_unseen = train_test_split(x, y, test_size = 0.10, random_state = 42)

In [4]:
#Import svm model
from sklearn import svm
model = svm.SVC(kernel='poly', degree=3, C=1.0, gamma='scale', decision_function_shape='ovr', probability=True)

# instantiate a 10-fold cross-validation
kf = KFold(n_splits = 10, shuffle = True, random_state = 42)

In [5]:
from sklearn.model_selection import cross_val_score, cross_val_predict

# Perform 10-Fold Cross Validation on the training dataset (x_seen, y_seen)
y_pred_cv = cross_val_predict(model, x_seen, y_seen, cv=kf)

y_proba_cv = cross_val_predict(model, x_seen, y_seen, cv = kf, method = 'predict_proba')

In [6]:
# Compute Metrics for Cross Validation for Seen data
conf_matrix = confusion_matrix(y_seen, y_pred_cv)
accuracy = accuracy_score(y_seen, y_pred_cv)
precision = precision_score(y_seen, y_pred_cv, average='weighted')  # Use 'weighted' for multi-class
recall = recall_score(y_seen, y_pred_cv, average='weighted')

roc_auc = roc_auc_score(y_seen, y_proba_cv, multi_class = 'ovr')

# Print Results
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("ROC-AUC Score:", roc_auc)



Confusion Matrix:
 [[87  0  0  0  0  0  4  0  0  0]
 [ 0 92  0  0  0  0  0  0  0  0]
 [ 0  0 94  0  0  0  0  0  0  0]
 [ 1  2  0 73  0  0  2  0 13  0]
 [ 0  0  0  0 87  0  0  0  0  0]
 [ 0  0  0  0  0 90  0  0  0  0]
 [ 1  0  0  0  0  2 91  0  0  0]
 [ 0  0  0  0  0  0  0 87  0  0]
 [ 0  0  0 46  0  0  0  0 41  0]
 [ 0  0  0  0  0  0  0  0  0 87]]
Accuracy: 0.9211111111111111
Precision: 0.9245619328997374
Recall: 0.9211111111111111
ROC-AUC Score: 0.9929868744732551


In [7]:
# Train now the data based on 90% since it will be the basis on how it will behave for unseen data
model.fit(x_seen, y_seen);

In [9]:
# Test now the model based on the actual data (unseen na 10%)

y_unseen_pred = model.predict(x_unseen)


conf_matrix = confusion_matrix(y_unseen, y_unseen_pred)
accuracy = accuracy_score(y_unseen, y_unseen_pred)
precision = precision_score(y_unseen, y_unseen_pred, average='weighted')  # Use 'weighted' for multi-class
recall = recall_score(y_unseen, y_unseen_pred, average='weighted')

print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Confusion Matrix:
 [[ 9  0  0  0  0  0  0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0]
 [ 0  0  6  0  0  0  0  0  0  0]
 [ 0  0  0  8  0  0  0  0  1  0]
 [ 0  0  0  0 13  0  0  0  0  0]
 [ 0  0  0  0  0 10  0  0  0  0]
 [ 0  0  0  0  0  0  6  0  0  0]
 [ 0  0  0  0  0  0  0 13  0  0]
 [ 0  0  0  6  0  0  0  0  7  0]
 [ 0  0  0  0  0  0  0  0  0 13]]
Accuracy: 0.93
Precision: 0.9451785714285714
Recall: 0.93
