In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install xgboost



In [3]:
!pip install imblearn



In [4]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(1)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

# Machine learning classifiers
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

In [5]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [6]:
def train_model_for_a_class(x,Label,K,use_smote):
  # Cross validation and model training
  cvscores = []
  avg_score = 0.0
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      score = model.score(x[test], Label[test])
      cvscores.append(score)
      avg_score = avg_score + score

  avg_score = avg_score/K
  return avg_score, cvscores

In [7]:
def train_model(data_file_path,use_smote=False): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print("Training model for 2 class")
  avg_score_2_class, cvscores_2_class = train_model_for_a_class(x,Label_1,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_2_class, avg_score_2_class))

  print("Training model for 4 class")
  avg_score_4_class, cvscores_4_class = train_model_for_a_class(x,Label_2,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_4_class, avg_score_4_class))

  print("Training model for 10 class")
  avg_score_10_class, cvscores_10_class = train_model_for_a_class(x,Label_3,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_10_class, avg_score_10_class))

In [8]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/feature_fusion_2048.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 6198

Training model for 2 class
Results:
 Cross validation scores: [0.9565217391304348, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9913043478260869

Training model for 4 class
Results:
 Cross validation scores: [1.0, 1.0, 0.9777777777777777, 1.0, 1.0] 
 Average accuracy: 0.9955555555555555

Training model for 10 class
Results:
 Cross validation scores: [0.9130434782608695, 0.9347826086956522, 0.9777777777777777, 0.9333333333333333, 0.9111111111111111] 
 Average accuracy: 0.9340096618357489



In [9]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/feature_fusion_4096.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 12422

Training model for 2 class
Results:
 Cross validation scores: [0.9782608695652174, 0.9782608695652174, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9913043478260869

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 0.9782608695652174, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9913043478260869

Training model for 10 class
Results:
 Cross validation scores: [0.8913043478260869, 0.8478260869565217, 0.9555555555555556, 0.9111111111111111, 0.8888888888888888] 
 Average accuracy: 0.8989371980676328



In [10]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/2 features/feature_fusion_mfcc_fft.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 4148

Training model for 2 class
Results:
 Cross validation scores: [0.9565217391304348, 1.0, 0.9555555555555556, 1.0, 1.0] 
 Average accuracy: 0.9824154589371981

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 0.9777777777777777, 1.0, 0.9777777777777777] 
 Average accuracy: 0.9867632850241546

Training model for 10 class
Results:
 Cross validation scores: [0.9565217391304348, 0.8913043478260869, 0.9111111111111111, 0.9777777777777777, 0.8888888888888888] 
 Average accuracy: 0.92512077294686



In [11]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/2 features/feature_fusion_mfcc_psd.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 4150

Training model for 2 class
Results:
 Cross validation scores: [0.9565217391304348, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9913043478260869

Training model for 4 class
Results:
 Cross validation scores: [0.9347826086956522, 1.0, 1.0, 1.0, 0.9777777777777777] 
 Average accuracy: 0.982512077294686

Training model for 10 class
Results:
 Cross validation scores: [0.8260869565217391, 0.8260869565217391, 0.9555555555555556, 0.8666666666666667, 0.9555555555555556] 
 Average accuracy: 0.8859903381642512



In [12]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/feature fusion data/2 features/feature_fusion_fft_psd.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 4098

Training model for 2 class
Results:
 Cross validation scores: [0.9565217391304348, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9913043478260869

Training model for 4 class
Results:
 Cross validation scores: [1.0, 0.9782608695652174, 0.9333333333333333, 0.9777777777777777, 1.0] 
 Average accuracy: 0.9778743961352656

Training model for 10 class
Results:
 Cross validation scores: [0.9347826086956522, 0.9565217391304348, 0.9777777777777777, 0.9111111111111111, 0.9333333333333333] 
 Average accuracy: 0.9427053140096617

