In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install xgboost



In [None]:
!pip install imblearn



In [None]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(42)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import  accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# Machine learning classifiers
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings('ignore')

In [None]:
class color:
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [None]:
def train_model_for_a_class(x,Label,K,use_smote,average='binary'):
  # Cross validation and model training
  accuracy_scores = []
  f1_scores = []
  recall_scores = []
  precision_scores = []
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      y_pred = model.predict(x[test])
      accuracy_scores.append(accuracy_score(Label[test], y_pred))
      f1_scores.append(f1_score(Label[test], y_pred, average=average))
      recall_scores.append(recall_score(Label[test], y_pred, average=average))
      precision_scores.append(precision_score(Label[test], y_pred, average=average))

  print("Accuracy: {}".format(np.mean(accuracy_scores)))
  print("f1_score: {}".format(np.mean(f1_scores)))
  print("recall_score: {}".format(np.mean(recall_scores)))
  print("precision_score: {}".format(np.mean(precision_scores)))
  print("\n-------------------------------------------------------\n")

In [None]:
def train_model(data_file_path,use_smote=False,average='binary'): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print(color.BOLD+"Training model for 2 class:"+color.END)
  train_model_for_a_class(x,Label_1,5,use_smote)

  print(color.BOLD+"Training model for 4 class:"+color.END)
  train_model_for_a_class(x,Label_2,5,use_smote, average=average)

  print(color.BOLD+"Training model for 10 class:"+color.END)
  train_model_for_a_class(x,Label_3,5,use_smote, average=average)

# L+H bands

## Entire signal as input

### M=1024

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L+H/M=1024/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 1026

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9955555555555555
f1_score: 0.9955555555555555
recall_score: 0.9955555555555555
precision_score: 0.9958169934640523

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9605797101449275
f1_score: 0.9601313340443776
recall_score: 0.9605797101449275
precision_score: 0.9732215320910973

-------------------------------------------------------



### M = 2048

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L+H/M=2048/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 2050

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.956135265700483
f1_score: 0.955686889599933
recall_score: 0.956135265700483
precision_score: 0.9696659765355419

-------------------------------------------------------



### M = 4096

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L+H/M=4096/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 4098

[1mTraining model for 2 class:[0m
Accuracy: 0.9956521739130434
f1_score: 0.9973333333333333
recall_score: 0.9947368421052631
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9956521739130434
f1_score: 0.9957007142361833
recall_score: 0.9956521739130434
precision_score: 0.9960869565217392

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9735265700483092
f1_score: 0.9725541945661297
recall_score: 0.9735265700483092
precision_score: 0.9794374664519593

-------------------------------------------------------



## Segment by a factor of 10

### M=1024

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L+H/M=1024/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 1026

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9973568281938325
f1_score: 0.997350985006633
recall_score: 0.9973568281938325
precision_score: 0.9973783833133053

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9973568281938325
f1_score: 0.9973568795160622
recall_score: 0.9973568281938325
precision_score: 0.9974199586329491

-------------------------------------------------------



### M = 2048

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L+H/M=2048/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 2050

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9973568281938325
f1_score: 0.9973489887251035
recall_score: 0.9973568281938325
precision_score: 0.9973783833133053

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9964757709251101
f1_score: 0.9964757484729689
recall_score: 0.9964757709251101
precision_score: 0.996580610749241

-------------------------------------------------------



### M = 4096

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L+H/M=4096/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 4098

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9964757709251101
f1_score: 0.9964698833739289
recall_score: 0.9964757709251101
precision_score: 0.9964945274635755

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9973568281938325
f1_score: 0.9973560786767723
recall_score: 0.9973568281938325
precision_score: 0.9974378556593493

-------------------------------------------------------



# L band

## Entire signal as input

### M=1024

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L/M=1024/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 513

[1mTraining model for 2 class:[0m
Accuracy: 0.9955555555555555
f1_score: 0.9972602739726029
recall_score: 0.9945945945945945
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9472463768115942
f1_score: 0.9470523857977039
recall_score: 0.9472463768115942
precision_score: 0.9502646369167211

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.8945893719806763
f1_score: 0.8871309489979566
recall_score: 0.8945893719806763
precision_score: 0.8994406870638754

-------------------------------------------------------



### M = 2048

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L/M=2048/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 1025

[1mTraining model for 2 class:[0m
Accuracy: 0.9955555555555555
f1_score: 0.9972602739726029
recall_score: 0.9945945945945945
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9603864734299516
f1_score: 0.9603378090366581
recall_score: 0.9603864734299516
precision_score: 0.9625029838022165

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.881159420289855
f1_score: 0.8738194757154689
recall_score: 0.881159420289855
precision_score: 0.8855968867418144

-------------------------------------------------------



### M = 4096

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L/M=4096/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 2049

[1mTraining model for 2 class:[0m
Accuracy: 0.9955555555555555
f1_score: 0.9972602739726029
recall_score: 0.9945945945945945
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9602898550724639
f1_score: 0.9600546841712587
recall_score: 0.9602898550724639
precision_score: 0.9624205740267122

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.8767149758454107
f1_score: 0.8756934622007086
recall_score: 0.8767149758454107
precision_score: 0.895599033816425

-------------------------------------------------------



## Segment by a factor of 10

### M=1024

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L/M=1024/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 513

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9894273127753304
f1_score: 0.9894344336649835
recall_score: 0.9894273127753304
precision_score: 0.9895082630660582

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9273127753303966
f1_score: 0.9270156460722154
recall_score: 0.9273127753303966
precision_score: 0.9289053339309703

-------------------------------------------------------



### M = 2048

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L/M=2048/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 1025

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9872246696035243
f1_score: 0.9872318543597745
recall_score: 0.9872246696035243
precision_score: 0.9874059230697323

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9365638766519824
f1_score: 0.9367043134759772
recall_score: 0.9365638766519824
precision_score: 0.9385719233841217

-------------------------------------------------------



### M = 4096

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L/M=4096/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 2049

[1mTraining model for 2 class:[0m
Accuracy: 1.0
f1_score: 1.0
recall_score: 1.0
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9881057268722466
f1_score: 0.9880811650162753
recall_score: 0.9881057268722466
precision_score: 0.9882803061037805

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9550660792951542
f1_score: 0.9552509379887184
recall_score: 0.9550660792951542
precision_score: 0.95632605949367

-------------------------------------------------------



# H band

## Entire signal as input

### M=1024

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/H/M=1024/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 513

[1mTraining model for 2 class:[0m
Accuracy: 0.9867632850241546
f1_score: 0.9919971160778658
recall_score: 0.9945945945945945
precision_score: 0.9897435897435898

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9734299516908212
f1_score: 0.9729913596239257
recall_score: 0.9734299516908212
precision_score: 0.9755622493764011

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9469565217391306
f1_score: 0.9460162145959247
recall_score: 0.9469565217391306
precision_score: 0.9615081665516447

-------------------------------------------------------



### M = 2048

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/H/M=2048/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 1025

[1mTraining model for 2 class:[0m
Accuracy: 0.9912077294685989
f1_score: 0.994593607305936
recall_score: 0.9945945945945945
precision_score: 0.9947368421052631

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9556521739130435
f1_score: 0.9552999757389935
recall_score: 0.9556521739130435
precision_score: 0.9585048782798143

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9646376811594204
f1_score: 0.9637628458498023
recall_score: 0.9646376811594204
precision_score: 0.9727504025764894

-------------------------------------------------------



### M = 4096

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/H/M=4096/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 227
Number of features in each datapoint: 2049

[1mTraining model for 2 class:[0m
Accuracy: 0.9867632850241546
f1_score: 0.9918538812785387
recall_score: 0.9891891891891891
precision_score: 0.9947368421052631

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9867632850241546
f1_score: 0.9866135195990269
recall_score: 0.9867632850241546
precision_score: 0.9881548419689938

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9384541062801933
f1_score: 0.9373278331973985
recall_score: 0.9384541062801933
precision_score: 0.9494331723027376

-------------------------------------------------------



## Segment by a factor of 10

### M=1024

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/H/M=1024/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 513

[1mTraining model for 2 class:[0m
Accuracy: 0.9982378854625551
f1_score: 0.998918918918919
recall_score: 0.9978494623655914
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.992511013215859
f1_score: 0.9925078039992277
recall_score: 0.992511013215859
precision_score: 0.9925633164965675

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9894273127753305
f1_score: 0.9894049227707352
recall_score: 0.9894273127753305
precision_score: 0.9896870397033511

-------------------------------------------------------



### M = 2048

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/H/M=2048/psd_2048_H.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 1025

[1mTraining model for 2 class:[0m
Accuracy: 0.9982378854625551
f1_score: 0.998918918918919
recall_score: 0.9978494623655914
precision_score: 1.0

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9837004405286344
f1_score: 0.9836952487773774
recall_score: 0.9837004405286344
precision_score: 0.9839610178572077

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9859030837004406
f1_score: 0.9859274499948709
recall_score: 0.9859030837004406
precision_score: 0.9861907561904688

-------------------------------------------------------



### M = 4096

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/H/M=4096/data.csv",use_smote=True, average='weighted')

Number of points in the dataset: 2270
Number of features in each datapoint: 2049

[1mTraining model for 2 class:[0m
Accuracy: 0.9960352422907489
f1_score: 0.9975708413662654
recall_score: 0.996236559139785
precision_score: 0.998917438181179

-------------------------------------------------------

[1mTraining model for 4 class:[0m
Accuracy: 0.9929515418502202
f1_score: 0.9929504291976416
recall_score: 0.9929515418502202
precision_score: 0.9930323855348802

-------------------------------------------------------

[1mTraining model for 10 class:[0m
Accuracy: 0.9740088105726873
f1_score: 0.9740418768732306
recall_score: 0.9740088105726873
precision_score: 0.9747999505169407

-------------------------------------------------------

