In [89]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
# Apply metrics
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    precision_score, recall_score, roc_auc_score
)

In [90]:
# load dataset
df = pd.read_csv('data/processed-data.csv')
df = pd.get_dummies(df, columns=['ph', 'rainfall'])

# split the dataframe into features (x) and labels (y)
x = df.drop(columns = ['label'])
y = df['label']

In [91]:
# split the data to 90-10 where 90% is for training and testing while the remaining 10% is for unseen data
x_seen, x_unseen, y_seen, y_unseen = train_test_split(x, y, test_size = 0.20, random_state = 42)

In [92]:
#Import svm model
from sklearn import svm
model = svm.SVC(kernel='poly', degree=3, C=1.0, gamma='scale', decision_function_shape='ovr', probability=True)

# instantiate a 10-fold cross-validation
kf = KFold(n_splits = 10, shuffle = True, random_state = 42)

In [99]:
from sklearn.model_selection import cross_val_score, cross_val_predict

# Perform 10-Fold Cross Validation on the training dataset (x_seen, y_seen)
y_pred_cv = cross_val_predict(model, x_seen, y_seen, cv=kf)

y_proba_cv = cross_val_predict(model, x_seen, y_seen, cv = kf, method = 'predict_proba')

InvalidParameterError: The 'scoring' parameter of cross_val_score must be a str among {'neg_root_mean_squared_error', 'neg_root_mean_squared_log_error', 'roc_auc_ovr', 'precision_macro', 'explained_variance', 'homogeneity_score', 'neg_negative_likelihood_ratio', 'normalized_mutual_info_score', 'jaccard', 'fowlkes_mallows_score', 'recall_macro', 'completeness_score', 'f1_samples', 'average_precision', 'precision_weighted', 'matthews_corrcoef', 'neg_brier_score', 'neg_mean_squared_log_error', 'f1', 'neg_mean_gamma_deviance', 'recall_weighted', 'neg_log_loss', 'neg_max_error', 'neg_mean_absolute_percentage_error', 'precision_micro', 'recall_samples', 'roc_auc_ovo_weighted', 'neg_mean_poisson_deviance', 'top_k_accuracy', 'f1_macro', 'rand_score', 'precision', 'v_measure_score', 'positive_likelihood_ratio', 'adjusted_rand_score', 'accuracy', 'neg_mean_squared_error', 'jaccard_macro', 'roc_auc_ovr_weighted', 'd2_absolute_error_score', 'jaccard_micro', 'neg_mean_absolute_error', 'r2', 'recall_micro', 'balanced_accuracy', 'jaccard_samples', 'jaccard_weighted', 'f1_micro', 'adjusted_mutual_info_score', 'roc_auc', 'mutual_info_score', 'recall', 'f1_weighted', 'neg_median_absolute_error', 'roc_auc_ovo', 'precision_samples'}, a callable or None. Got ['accuracy', 'precision', 'recall'] instead.

In [94]:
# Compute Metrics for Cross Validation for Seen data
conf_matrix = confusion_matrix(y_seen, y_pred_cv)
accuracy = accuracy_score(y_seen, y_pred_cv)
precision = precision_score(y_seen, y_pred_cv, average='weighted')  # Use 'weighted' for multi-class
recall = recall_score(y_seen, y_pred_cv, average='weighted')

roc_auc = roc_auc_score(y_seen, y_proba_cv, multi_class = 'ovr')

# Print Results
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("ROC-AUC Score:", roc_auc)



Confusion Matrix:
 [[74  0  0  1  0  0  4  0  0  0]
 [ 0 87  0  0  0  0  0  0  0  0]
 [ 0  0 85  0  0  0  0  0  0  0]
 [ 1  2  0 64  0  0  1  0  9  0]
 [ 0  0  0  0 73  0  0  0  0  0]
 [ 0  0  0  0  0 79  0  0  0  0]
 [ 0  4  0  0  0  1 82  0  0  0]
 [ 0  0  0  0  0  0  0 75  0  0]
 [ 0  0  0 42  0  0  0  0 38  0]
 [ 0  0  0  0  0  0  0  0  0 78]]
Accuracy: 0.91875
Precision: 0.9263539865888063
Recall: 0.91875
ROC-AUC Score: 0.9935852260756348


In [96]:
# Train now the data based on 90% since it will be the basis on how it will behave for unseen data
model.fit(x_seen, y_seen);

In [100]:
# Test now the model based on the actual data (unseen na 10%)

y_unseen_pred = model.predict(x_unseen);


conf_matrix = confusion_matrix(y_unseen, y_unseen_pred)
accuracy = accuracy_score(y_unseen, y_unseen_pred)
precision = precision_score(y_unseen, y_unseen_pred, average='weighted')  # Use 'weighted' for multi-class
recall = recall_score(y_unseen, y_unseen_pred, average='weighted')

print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Confusion Matrix:
 [[21  0  0  0  0  0  0  0  0  0]
 [ 0 13  0  0  0  0  0  0  0  0]
 [ 0  0 15  0  0  0  0  0  0  0]
 [ 0  0  0 17  0  0  1  0  5  0]
 [ 0  0  0  0 27  0  0  0  0  0]
 [ 0  0  0  0  0 21  0  0  0  0]
 [ 0  0  0  0  0  0 13  0  0  0]
 [ 0  0  0  0  0  0  0 25  0  0]
 [ 0  0  0 10  0  0  0  0 10  0]
 [ 0  0  0  0  0  0  0  0  0 22]]
Accuracy: 0.92
Precision: 0.919431216931217
Recall: 0.92
