In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

sns.set_theme()


In [None]:
# Load dataset
df = pd.read_csv("risk.csv")
df.head()


In [None]:
# Convert risk level into target label category vector
le = LabelEncoder()
df["Target"] = le.fit_transform(df["RiskLevel"])
df.head()


In [None]:
# Split the dataset into features (X) and target variable (y)
preserve_columns = ["Age", "SystolicBP", "DiastolicBP", "BS", "BodyTemp", "HeartRate"]
X = df[preserve_columns]
y = df["Target"]


In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)


In [None]:
# Train SVM
svm_classifier = SVC(C=1.0, kernel="linear", gamma="scale", probability=True, random_state=21)
svm_classifier.fit(X_train, y_train)


In [None]:
# Train naive bayes
naive_bayes_classifier = MultinomialNB()
naive_bayes_classifier.fit(X_train, y_train)


In [None]:
# Train decision tree
decision_tree_classifier = DecisionTreeClassifier(random_state=21, max_depth=5)
decision_tree_classifier.fit(X_train, y_train)


In [None]:
# Predict probabilities for each classifier
svm_probs = svm_classifier.predict_proba(X_test)
nb_probs = naive_bayes_classifier.predict_proba(X_test)
dt_probs = decision_tree_classifier.predict_proba(X_test)


In [None]:
# Predict with individual classifiers
svm_predictions = svm_classifier.predict(X_test)
nb_predictions = naive_bayes_classifier.predict(X_test)
dt_predictions = decision_tree_classifier.predict(X_test)


In [None]:
# Decode classifiers (0, 1, 2) -> (low, med, high)
svm_predictions = le.inverse_transform(svm_predictions)
nb_predictions = le.inverse_transform(nb_predictions)
dt_predictions = le.inverse_transform(dt_predictions)
y_test = le.inverse_transform(y_test)


In [None]:
# SVM report
svm_report = classification_report(y_test, svm_predictions)
print(svm_report)

labels = le.inverse_transform(svm_classifier.classes_)
svm_cm = confusion_matrix(y_test, svm_predictions)

fig = plt.figure(dpi=300)
sns.heatmap(svm_cm, annot=True, cmap="Blues", xticklabels=labels, yticklabels=labels)


In [None]:
# Naive Bayes report
nb_report = classification_report(y_test, nb_predictions)
print(nb_report)

labels = le.inverse_transform(svm_classifier.classes_)
nb_cm = confusion_matrix(y_test, nb_predictions)

fig = plt.figure(dpi=300)
sns.heatmap(nb_cm, annot=True, cmap="Blues", xticklabels=labels, yticklabels=labels)


In [None]:
# DT report
dt_report = classification_report(y_test, dt_predictions)
print(dt_report)

labels = le.inverse_transform(svm_classifier.classes_)
dt_cm = confusion_matrix(y_test, dt_predictions)

fig = plt.figure(dpi=300)
sns.heatmap(dt_cm, annot=True, cmap="Blues", xticklabels=labels, yticklabels=labels)


In [None]:
# Setup 1x3 subplot
fig, axis = plt.subplots(1, 3, figsize=(10, 4), dpi=300)
for i in range(3):
    axis[i].tick_params(axis="x", rotation=90)
    axis[i].set_ylim([0, 1])

axis[0].set_title("Support Vector Machine")
sns.barplot(y=svm_probs[1], x=le.inverse_transform(svm_classifier.classes_), ax=axis[0])

axis[1].set_title("Naive Bayes")
sns.barplot(y=nb_probs[1], x=le.inverse_transform(naive_bayes_classifier.classes_), ax=axis[1])

axis[2].set_title("Decision Tree")
sns.barplot(y=dt_probs[1], x=le.inverse_transform(decision_tree_classifier.classes_), ax=axis[2])


In [None]:
# Ensemble classifier => Select most confident model
ensemble_predictions = []
for i in range(len(X_test)):
    max_prob = max(svm_probs[i].max(), nb_probs[i].max(), dt_probs[i].max())
    _max = 0

    if max_prob == svm_probs[i].max():
        _max = svm_classifier.classes_[np.argmax(svm_probs[i])]
    elif max_prob == nb_probs[i].max():
        _max = naive_bayes_classifier.classes_[np.argmax(nb_probs[i])]
    else:
        _max = decision_tree_classifier.classes_[np.argmax(dt_probs[i])]

    ensemble_predictions.append(_max)


In [None]:
# Ensemble learning report
ensemble_predictions = le.inverse_transform(ensemble_predictions)
ensemble_report = classification_report(y_test, ensemble_predictions)
print(ensemble_report)

ensemble_cm = confusion_matrix(y_test, ensemble_predictions)

fig = plt.figure(dpi=300)
sns.heatmap(ensemble_cm, annot=True, cmap="Blues", xticklabels=labels, yticklabels=labels)


In [None]:
# Evaluate classifiers
svm_accuracy = accuracy_score(y_test, svm_predictions)
nb_accuracy = accuracy_score(y_test, nb_predictions)
dt_accuracy = accuracy_score(y_test, dt_predictions)
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)


In [None]:
# Create a bar chart to plot the accuracies
labels = ["SVM", "Naive Bayes", "Decision Tree", "Ensemble"]
accuracies = [svm_accuracy, nb_accuracy, dt_accuracy, ensemble_accuracy]
accuracies = [100 * x for x in accuracies]

fig, ax = plt.subplots(1, 1, figsize=(10, 4), dpi=300)
ax = sns.barplot(y=accuracies, x=labels)
ax.set_title("Accuracy of Models")
ax.bar_label(ax.containers[0], fontsize=10)


In [None]:
# Obtain SVM weights to use in Flutter application
intercept = svm_classifier.intercept_
coefficients = svm_classifier.coef_

print(intercept)
print(coefficients)
