In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
!pip install xgboost



In [7]:
!pip install imblearn



In [8]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(1)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

# Machine learning classifiers
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

In [9]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [10]:
def train_model_for_a_class(x,Label,K,use_smote):
  # Cross validation and model training
  cvscores = []
  avg_score = 0.0
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      score = model.score(x[test], Label[test])
      cvscores.append(score)
      avg_score = avg_score + score

  avg_score = avg_score/K
  return avg_score, cvscores

In [11]:
def train_model(data_file_path,use_smote=False): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print("Training model for 2 class")
  avg_score_2_class, cvscores_2_class = train_model_for_a_class(x,Label_1,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_2_class, avg_score_2_class))

  print("Training model for 4 class")
  avg_score_4_class, cvscores_4_class = train_model_for_a_class(x,Label_2,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_4_class, avg_score_4_class))

  print("Training model for 10 class")
  avg_score_10_class, cvscores_10_class = train_model_for_a_class(x,Label_3,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_10_class, avg_score_10_class))

# Root mean square energy 

Window length = 1e6

In [42]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/rms_energy_M=1e6.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.920704845814978, 0.9162995594713657, 0.8986784140969163, 0.8964757709251101, 0.9229074889867841] 
 Average accuracy: 0.911013215859031

Training model for 10 class
Results:
 Cross validation scores: [0.7797356828193832, 0.7533039647577092, 0.7577092511013216, 0.7665198237885462, 0.7687224669603524] 
 Average accuracy: 0.7651982378854626



Window length = 1e7

In [43]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/rms_energy_M=1e7.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.9347826086956522, 1.0, 0.9555555555555556, 0.9555555555555556, 0.9777777777777777] 
 Average accuracy: 0.9647342995169084

Training model for 10 class
Results:
 Cross validation scores: [0.717391304347826, 0.7391304347826086, 0.7333333333333333, 0.6888888888888889, 0.6222222222222222] 
 Average accuracy: 0.7001932367149759



# Zero crossing rate

Window length = 1e6

In [51]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/zero_crossing_rate_1e6.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 4

Training model for 2 class
Results:
 Cross validation scores: [0.6894273127753304, 0.7334801762114538, 0.7158590308370044, 0.711453744493392, 0.7224669603524229] 
 Average accuracy: 0.7145374449339207

Training model for 4 class
Results:
 Cross validation scores: [0.5110132158590308, 0.5110132158590308, 0.5374449339207048, 0.4955947136563877, 0.4889867841409692] 
 Average accuracy: 0.5088105726872246

Training model for 10 class
Results:
 Cross validation scores: [0.42731277533039647, 0.41409691629955947, 0.4052863436123348, 0.4251101321585903, 0.43612334801762115] 
 Average accuracy: 0.4215859030837004



Window length = 1e7

In [12]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/zero_crossing_rate_1e7.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 4

Training model for 2 class
Results:
 Cross validation scores: [0.717391304347826, 0.6086956521739131, 0.6222222222222222, 0.6444444444444445, 0.6666666666666666] 
 Average accuracy: 0.6518840579710145

Training model for 4 class
Results:
 Cross validation scores: [0.45652173913043476, 0.43478260869565216, 0.5111111111111111, 0.4666666666666667, 0.3333333333333333] 
 Average accuracy: 0.44048309178743966

Training model for 10 class
Results:
 Cross validation scores: [0.30434782608695654, 0.34782608695652173, 0.26666666666666666, 0.3111111111111111, 0.4444444444444444] 
 Average accuracy: 0.3348792270531401



Window length = 512

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/zero crossing rate/zero_crossing_rate_wl=512.csv",use_smote=True)

Number of points in the dataset: 4433537
Number of features in each datapoint: 4

Training model for 2 class


In [None]:
x = np.transpose(Data[0:4,:])
Label_1 = np.transpose(Data[4:5,:]); Label_1 = Label_1.astype(int);
Label_2 = np.transpose(Data[5:6,:]); Label_2 = Label_2.astype(int);
Label_3 = np.transpose(Data[6:7,:]); Label_3 = Label_3.astype(int);

In [None]:
cvscores = []
cnt = 0
K = 5
kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=1)
for train, test in kfold.split(x, Label_1):
    cnt = cnt + 1
    model = XGBClassifier()
    model.fit(x[train], Label_1[train])
    score = model.score(x[test], Label_1[test])
    print("Cross validation {} : {}".format(cnt, score))
    cvscores.append(score)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 1 : 0.9309084839654092


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 2 : 0.9378386120346269


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 3 : 0.9336466273526656


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 4 : 0.9329699664037839


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 5 : 0.9344597482595717


In [None]:
acc = (0.9309084839654092+0.9378386120346269+0.9336466273526656+0.9329699664037839+0.9344597482595717)/5
acc

0.9339646876032115

In [None]:
cnt= 0
K = 5
kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=1)
for train, test in kfold.split(x, Label_2):
    cnt = cnt + 1
    model = XGBClassifier()
    model.fit(x[train], Label_2[train])
    score = model.score(x[test], Label_2[test])
    print("Cross validation {} : {}".format(cnt, score))
    cvscores.append(score)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 1 : 0.7150448625703163


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 2 : 0.7095391041921354


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 3 : 0.7120525720446551


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 4 : 0.717024902250687


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 5 : 0.7128160711486432


In [None]:
acc = (0.7150448625703163+0.7095391041921354+0.7120525720446551+0.717024902250687+ 0.7128160711486432)/5
acc

0.7132955024412874

In [None]:
cnt = 0
K = 5
kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=1)
for train, test in kfold.split(x, Label_3):
    cnt = cnt + 1
    model = XGBClassifier()
    model.fit(x[train], Label_3[train])
    score = model.score(x[test], Label_3[test])
    print("Cross validation {} : {}".format(cnt, score))
    cvscores.append(score)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 1 : 0.64223284328099


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 2 : 0.6417411368793334


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 3 : 0.6317351729488997


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [None]:
acc = (0.64223284328099+0.6417411368793334+0.6317351729488997)/3
acc

0.6385697177030744