In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install xgboost



In [None]:
!pip install imblearn



In [None]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(42)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import  accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# Machine learning classifiers
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings('ignore')

In [None]:
class color:
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [None]:
def train_model_for_a_class(x,Label,K,use_smote,average='binary'):
  # Cross validation and model training
  accuracy_scores = []
  f1_scores = []
  recall_scores = []
  precision_scores = []
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      y_pred = model.predict(x[test])
      accuracy_scores.append(accuracy_score(Label[test], y_pred))
      f1_scores.append(f1_score(Label[test], y_pred, average=average))
      recall_scores.append(recall_score(Label[test], y_pred, average=average))
      precision_scores.append(precision_score(Label[test], y_pred, average=average))

  print("Accuracy: {}".format(np.mean(accuracy_scores)))
  print("f1_score: {}".format(np.mean(f1_scores)))
  print("recall_score: {}".format(np.mean(recall_scores)))
  print("precision_score: {}".format(np.mean(precision_scores)))
  print("\n-------------------------------------------------------\n")

In [None]:
def train_model(data_file_path,use_smote=False,average='binary'): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print(color.BOLD+"Training model for 2 class:"+color.END)
  train_model_for_a_class(x,Label_1,5,use_smote)

  print(color.BOLD+"Training model for 4 class:"+color.END)
  train_model_for_a_class(x,Label_2,5,use_smote, average=average)

  print(color.BOLD+"Training model for 10 class:"+color.END)
  train_model_for_a_class(x,Label_3,5,use_smote, average=average)

# Root mean square energy 

Window length = 1e6

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/rms_energy_M=1e6.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 2

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9127753303964757
f1_score: 0.9135136887773495
recall_score: 0.9127753303964757
precision_score: 0.9169064279910379

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.7607929515418503
f1_score: 0.7568677040816272
recall_score: 0.7607929515418503
precision_score: 0.7679468055573022

-------------------------------------------------------



Window length = 1e7

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/rms_energy_M=1e7.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 2

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9647342995169084
f1_score: 0.9646127869671581
recall_score: 0.9647342995169084
precision_score: 0.9679909065075305

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.7135265700483091
f1_score: 0.7019010206111654
recall_score: 0.7135265700483091
precision_score: 0.7398777317690362

-------------------------------------------------------



# Zero crossing rate

Window length = 1e6

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/zero_crossing_rate_1e6.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 4

[1mTraining model for 2 class:[0m
Accuracy: 0.7176211453744493
f1_score: 0.7961496061193762
recall_score: 0.6736559139784946
precision_score: 0.9737662694634895

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.5083700440528635
f1_score: 0.4548529037102256
recall_score: 0.5083700440528635
precision_score: 0.5803844033410339

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.43480176211453736
f1_score: 0.4251350835226245
recall_score: 0.43480176211453736
precision_score: 0.4400816251176353

-------------------------------------------------------



Window length = 1e7

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/zero_crossing_rate_1e7.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 4

[1mTraining model for 2 class:[0m
Accuracy: 0.6300483091787439
f1_score: 0.7361255292093927
recall_score: 0.6291607396870555
precision_score: 0.8912724193458825

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.41391304347826086
f1_score: 0.40845929984710594
recall_score: 0.41391304347826086
precision_score: 0.45044683121494716

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.3084057971014493
f1_score: 0.30145874200946665
recall_score: 0.3084057971014493
precision_score: 0.3212921555095468

-------------------------------------------------------



# Some testing

Window length = 512

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/time-domain/zero crossing rate/zero_crossing_rate_wl=512.csv",use_smote=True)

Number of points in the dataset: 4433537
Number of features in each datapoint: 4

Training model for 2 class


In [None]:
x = np.transpose(Data[0:4,:])
Label_1 = np.transpose(Data[4:5,:]); Label_1 = Label_1.astype(int);
Label_2 = np.transpose(Data[5:6,:]); Label_2 = Label_2.astype(int);
Label_3 = np.transpose(Data[6:7,:]); Label_3 = Label_3.astype(int);

In [None]:
cvscores = []
cnt = 0
K = 5
kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=1)
for train, test in kfold.split(x, Label_1):
    cnt = cnt + 1
    model = XGBClassifier()
    model.fit(x[train], Label_1[train])
    score = model.score(x[test], Label_1[test])
    print("Cross validation {} : {}".format(cnt, score))
    cvscores.append(score)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 1 : 0.9309084839654092


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 2 : 0.9378386120346269


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 3 : 0.9336466273526656


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 4 : 0.9329699664037839


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 5 : 0.9344597482595717


In [None]:
acc = (0.9309084839654092+0.9378386120346269+0.9336466273526656+0.9329699664037839+0.9344597482595717)/5
acc

0.9339646876032115

In [None]:
cnt= 0
K = 5
kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=1)
for train, test in kfold.split(x, Label_2):
    cnt = cnt + 1
    model = XGBClassifier()
    model.fit(x[train], Label_2[train])
    score = model.score(x[test], Label_2[test])
    print("Cross validation {} : {}".format(cnt, score))
    cvscores.append(score)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 1 : 0.7150448625703163


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 2 : 0.7095391041921354


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 3 : 0.7120525720446551


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 4 : 0.717024902250687


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 5 : 0.7128160711486432


In [None]:
acc = (0.7150448625703163+0.7095391041921354+0.7120525720446551+0.717024902250687+ 0.7128160711486432)/5
acc

0.7132955024412874

In [None]:
cnt = 0
K = 5
kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=1)
for train, test in kfold.split(x, Label_3):
    cnt = cnt + 1
    model = XGBClassifier()
    model.fit(x[train], Label_3[train])
    score = model.score(x[test], Label_3[test])
    print("Cross validation {} : {}".format(cnt, score))
    cvscores.append(score)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 1 : 0.64223284328099


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 2 : 0.6417411368793334


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Cross validation 3 : 0.6317351729488997


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [None]:
acc = (0.64223284328099+0.6417411368793334+0.6317351729488997)/3
acc

0.6385697177030744