In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install xgboost



In [None]:
!pip install imblearn



In [None]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(42)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import  accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# Machine learning classifiers
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings('ignore')

In [None]:
class color:
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [None]:
def train_model_for_a_class(x,Label,K,use_smote,average='binary'):
  # Cross validation and model training
  accuracy_scores = []
  f1_scores = []
  recall_scores = []
  precision_scores = []
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      y_pred = model.predict(x[test])
      accuracy_scores.append(accuracy_score(Label[test], y_pred))
      f1_scores.append(f1_score(Label[test], y_pred, average=average))
      recall_scores.append(recall_score(Label[test], y_pred, average=average))
      precision_scores.append(precision_score(Label[test], y_pred, average=average))

  print("Accuracy: {}".format(np.mean(accuracy_scores)))
  print("f1_score: {}".format(np.mean(f1_scores)))
  print("recall_score: {}".format(np.mean(recall_scores)))
  print("precision_score: {}".format(np.mean(precision_scores)))
  print("\n-------------------------------------------------------\n")

In [None]:
def train_model(data_file_path,use_smote=False,average='binary'): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print(color.BOLD+"Training model for 2 class:"+color.END)
  train_model_for_a_class(x,Label_1,5,use_smote)

  print(color.BOLD+"Training model for 4 class:"+color.END)
  train_model_for_a_class(x,Label_2,5,use_smote, average=average)

  print(color.BOLD+"Training model for 10 class:"+color.END)
  train_model_for_a_class(x,Label_3,5,use_smote, average=average)

In [None]:
# Parameter setting 1
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/feature_fusion_2048.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 6198

[1mTraining model for 2 class:[0m
Accuracy: 0.9913043478260869
f1_score: 0.9945945945945945
recall_score: 0.9945945945945945
precision_score: 0.9945945945945945

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9911111111111112
f1_score: 0.9911475869809203
recall_score: 0.9911111111111112
precision_score: 0.9922614379084967

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9384541062801933
f1_score: 0.9375781252626947
recall_score: 0.9384541062801933
precision_score: 0.9516403650026838

-------------------------------------------------------



In [None]:
# Parameter setting 2
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/feature_fusion_4096.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 12422

[1mTraining model for 2 class:[0m
Accuracy: 0.9913043478260869
f1_score: 0.9946666666666666
recall_score: 0.9947368421052631
precision_score: 0.9947368421052631

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9913043478260869
f1_score: 0.9914361001317523
recall_score: 0.9913043478260869
precision_score: 0.9922847399829496

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9120772946859905
f1_score: 0.9103650793650793
recall_score: 0.9120772946859905
precision_score: 0.9285764895330113

-------------------------------------------------------



In [None]:
# Fuse MFCC and FFT features
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/2 features/feature_fusion_mfcc_fft.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 4148

[1mTraining model for 2 class:[0m
Accuracy: 0.9866666666666667
f1_score: 0.991549295774648
recall_score: 0.9837837837837838
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9823188405797101
f1_score: 0.9825582587377818
recall_score: 0.9823188405797101
precision_score: 0.9857844715986234

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9255072463768116
f1_score: 0.9214967767226933
recall_score: 0.9255072463768116
precision_score: 0.9323349436392915

-------------------------------------------------------



In [None]:
# Fuse MFCC and PSD features
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/2 features/feature_fusion_mfcc_psd.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 4150

[1mTraining model for 2 class:[0m
Accuracy: 0.9913043478260869
f1_score: 0.9945945945945945
recall_score: 0.9945945945945945
precision_score: 0.9945945945945945

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.982512077294686
f1_score: 0.9831878891213929
recall_score: 0.982512077294686
precision_score: 0.9856720659278204

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.8902415458937197
f1_score: 0.8827063990286497
recall_score: 0.8902415458937197
precision_score: 0.9154728931830383

-------------------------------------------------------



In [None]:
# Fuse FFT and PSD features
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/2 features/feature_fusion_fft_psd.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 4098

[1mTraining model for 2 class:[0m
Accuracy: 0.9913043478260869
f1_score: 0.9945945945945945
recall_score: 0.9945945945945945
precision_score: 0.9945945945945945

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9734299516908212
f1_score: 0.9734586381187598
recall_score: 0.9734299516908212
precision_score: 0.9747909601843959

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9339130434782609
f1_score: 0.929054643376894
recall_score: 0.9339130434782609
precision_score: 0.9474358561460011

-------------------------------------------------------

