<a href="https://colab.research.google.com/github/AbdullahiOlapojoye/Machine-Learning-based-Infants-Sucking-Patterns-Classification/blob/main/SVM_%26_GB_Classifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Libraries
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.svm import SVC

# For the Visuals
import pandas as pd
import seaborn as sns
import matplotlib as mpl
from matplotlib import cm
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.ticker import MaxNLocator
from matplotlib.offsetbox import AnnotationBbox, OffsetImage
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib.patches import Rectangle
from IPython.display import display_html
plt.rcParams.update({'font.size': 16})
import plotly.graph_objects as go
#performance
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, confusion_matrix, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score,auc,f1_score
from sklearn.metrics import precision_recall_curve,roc_curve

#**DATA PREPARATION**

In [None]:
# Function to read and process CSV files
def read_and_process_csv(file_path):
    df = pd.read_csv(file_path)
    # Assuming the CSV file has columns "feature1" and "feature2"
    data = df[['F1', 'F2']].values
    return data

# Function to read labels from the "targets.csv" file
def read_labels_from_targets(targets_file_path):
    df_targets = pd.read_csv(targets_file_path)
    return df_targets.values  # Assuming the "targets.csv" file has a column named "label"

# Function to generate time series data and labels from a list of CSV files
def generate_data_from_csv(file_list, targets_file_path):
    data_list = []

    for file_path in file_list:
        time_series_data = read_and_process_csv(file_path)
        data_list.append(time_series_data)

    labels = read_labels_from_targets(targets_file_path)

    return np.array(data_list), np.array(labels)

# Specify the path to your CSV files
path1 = "/content/drive/MyDrive/BMEN 6367/SENSOR_DATA_DL_m/Features/Subject_"
targets_file_path = "/content/drive/MyDrive/BMEN 6367/SENSOR_DATA_DL_V2/Target/Target.csv"
#files
csv_files = []
for i in range(1,41):
    file_path = path1 + str(i) + '.csv'
    print(file_path)
    csv_files.append(file_path)

# Generate data and labels
X, y = generate_data_from_csv(csv_files, targets_file_path)


#**MODELLING WITH SVC**

In [None]:
# Flatten the time series data
X = X.reshape(X.shape[0], -1)
y = y.reshape(y.shape[0], -1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Standardize the data (important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM model
svm_model = SVC(kernel='rbf', C=1.0)#, probability=True)

# Define K-fold cross-validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(svm_model, X, y, cv=kfold, scoring='accuracy')

# Print cross-validation scores
print("Cross-Validation Scores:", cv_scores)
print("Mean Accuracy:", np.mean(cv_scores))

# Train the SVM model on the entire dataset
svm_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
# X_test_scaled = scaler.transform(X_test.reshape(X_test.shape[0], -1))
y_pred = svm_model.predict(X_test_scaled)

# Evaluate the model on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy}')

# Display classification report
print(classification_report(y_test, y_pred))


**Performance Evaluation**

In [None]:
Train_SVM = svm_model.predict(X_train_scaled)
# Evaluate the model on the test set
accuracy = accuracy_score(y_train, Train_SVM)
print(f'Training Accuracy: {accuracy}')

#Classification Report
print(classification_report(y_test, y_pred))
print("Recall score  =",recall_score(y_test, y_pred))
print("Precision score =",precision_score(y_test, y_pred))
print("Accuracy score  =",accuracy_score(y_test, y_pred))
print("F score  =", f1_score(y_test, y_pred, average='weighted'))

# Create confusion matrix
cm = confusion_matrix(y_test, y_pred)
# Plot confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Set1', xticklabels=['Healthy', 'Unhealthy'], yticklabels=['Healthy', 'Unhealthy'])
plt.title('Confusion Matrix (SVC)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# Compute ROC curve and ROC area for each class
# y_pred_auc = svm_model.predict_proba(X_test_scaled).flatten()
y_pred_auc = svm_model.decision_function(X_test_scaled)
fpr, tpr, _ = roc_curve(y_test, y_pred_auc)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

**Save Model**

In [None]:
import joblib
# Save the trained SVM model to a file
model_svm = 'svm_model.joblib'
joblib.dump(svm_model, model_svm)
print(f"SVM model saved to {model_svm}")
# # Load the saved SVM model
# loaded_svm_model = joblib.load('svm_model.joblib')

------------------------------------------
------------------------------------------

#**GRADIENT BOOSTING CLASSIFIER**

In [None]:
# Define the Gradient Boosting Classifier model
gb_model = GradientBoostingClassifier(random_state=42)

# Define hyperparameters to search
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform Grid Search
grid_search = GridSearchCV(gb_model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Train the model with the best hyperparameters on the entire training set
best_gb_model = GradientBoostingClassifier(random_state=42, **best_params)
best_gb_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred_GB = best_gb_model.predict(X_test_scaled)

# Evaluate the model on the test set
accuracy = accuracy_score(y_test, y_pred_GB)
print(f'Test Accuracy: {accuracy}')

# Display classification report
print(classification_report(y_test, y_pred_GB))

**Performance Evaluation**

In [None]:
y_pred_GB = best_gb_model.predict(X_test_scaled)
print(classification_report(y_test, y_pred_GB))
print("Recall score  =",recall_score(y_test, y_pred_GB))
print("Precision score =",precision_score(y_test, y_pred_GB))
print("Accuracy score  =",accuracy_score(y_test, y_pred_GB))
print("F score  =", f1_score(y_test, y_pred_GB, average='weighted'))

# Create confusion matrix
cm = confusion_matrix(y_test, y_pred_GB)
# Plot confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Set1', xticklabels=['Healthy', 'Unhealthy'], yticklabels=['Healthy', 'Unhealthy'])
plt.title('Confusion Matrix(GB)')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

# Compute ROC curve and ROC area for each class
y_pred_GB_auc = best_gb_model.decision_function(X_test_scaled)
fpr, tpr, _ = roc_curve(y_test, y_pred_GB)
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
# Evaluate the model on the test set
Train_GB = best_gb_model.predict(X_train_scaled)
accuracy = accuracy_score(y_train, Train_GB)
print(f'Train Accuracy for GB: {accuracy}')

In [None]:
import joblib
# Save the trained SVM model to a file
model_GB = 'GB_model.joblib'
joblib.dump(best_gb_model, model_GB)
print(f"GB model saved to {model_GB}")
# # Load the saved SVM model
# loaded_svm_model = joblib.load('svm_model.joblib')