In [96]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [97]:
!pip install xgboost



In [98]:
!pip install imblearn



In [99]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(1)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

# Machine learning classifiers
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

In [100]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [101]:
def train_model_for_a_class(x,Label,K,use_smote):
  # Cross validation and model training
  cvscores = []
  avg_score = 0.0
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      score = model.score(x[test], Label[test])
      cvscores.append(score)
      avg_score = avg_score + score

  avg_score = avg_score/K
  return avg_score, cvscores

In [102]:
def train_model(data_file_path,use_smote=False): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print("Training model for 2 class")
  avg_score_2_class, cvscores_2_class = train_model_for_a_class(x,Label_1,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_2_class, avg_score_2_class))

  print("Training model for 4 class")
  avg_score_4_class, cvscores_4_class = train_model_for_a_class(x,Label_2,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_4_class, avg_score_4_class))

  print("Training model for 10 class")
  avg_score_10_class, cvscores_10_class = train_model_for_a_class(x,Label_3,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_10_class, avg_score_10_class))

# L+H bands

## Entire signal as input

### M=1024

In [103]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L+H/M=1024/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 1026

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 0.9777777777777777] 
 Average accuracy: 0.9955555555555555

Training model for 10 class
Results:
 Cross validation scores: [0.9565217391304348, 0.9565217391304348, 0.9777777777777777, 0.9777777777777777, 1.0] 
 Average accuracy: 0.973719806763285



### M = 2048

In [104]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L+H/M=2048/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2050

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 10 class
Results:
 Cross validation scores: [0.9347826086956522, 0.9347826086956522, 0.9777777777777777, 0.9777777777777777, 1.0] 
 Average accuracy: 0.9650241545893721



### M = 4096

In [105]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L+H/M=4096/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 4098

Training model for 2 class
Results:
 Cross validation scores: [1.0, 0.9782608695652174, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9956521739130434

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9956521739130434

Training model for 10 class
Results:
 Cross validation scores: [0.9565217391304348, 0.9565217391304348, 0.9555555555555556, 0.9555555555555556, 1.0] 
 Average accuracy: 0.9648309178743961



## Segment by a factor of 10

### M=1024

In [106]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L+H/M=1024/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 1026

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.9977973568281938, 0.9955947136563876, 0.9977973568281938, 0.9977973568281938, 0.9977973568281938] 
 Average accuracy: 0.9973568281938325

Training model for 10 class
Results:
 Cross validation scores: [0.9955947136563876, 0.9977973568281938, 0.9889867841409692, 0.9977973568281938, 0.9955947136563876] 
 Average accuracy: 0.9951541850220265



### M = 2048

In [107]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L+H/M=2048/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2050

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.9977973568281938, 0.9955947136563876, 0.9977973568281938, 0.9977973568281938, 0.9977973568281938] 
 Average accuracy: 0.9973568281938325

Training model for 10 class
Results:
 Cross validation scores: [0.9977973568281938, 1.0, 0.9955947136563876, 0.9955947136563876, 1.0] 
 Average accuracy: 0.9977973568281939



### M = 4096

In [108]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L+H/M=4096/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 4098

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.9977973568281938, 0.9955947136563876, 0.9977973568281938, 0.9955947136563876, 0.9955947136563876] 
 Average accuracy: 0.9964757709251101

Training model for 10 class
Results:
 Cross validation scores: [0.9933920704845814, 0.9933920704845814, 1.0, 0.9911894273127754, 0.9977973568281938] 
 Average accuracy: 0.9951541850220262



# L band

## Entire signal as input

### M=1024

In [109]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L/M=1024/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 513

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 0.9777777777777777, 1.0, 1.0] 
 Average accuracy: 0.9955555555555555

Training model for 4 class
Results:
 Cross validation scores: [0.9347826086956522, 0.9347826086956522, 1.0, 0.8888888888888888, 1.0] 
 Average accuracy: 0.9516908212560388

Training model for 10 class
Results:
 Cross validation scores: [0.9130434782608695, 0.8478260869565217, 0.9333333333333333, 0.8666666666666667, 0.9555555555555556] 
 Average accuracy: 0.9032850241545892



### M = 2048

In [110]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L/M=2048/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 1025

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 0.9777777777777777, 1.0, 1.0] 
 Average accuracy: 0.9955555555555555

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 0.9347826086956522, 0.9777777777777777, 0.9111111111111111, 1.0] 
 Average accuracy: 0.9603864734299516

Training model for 10 class
Results:
 Cross validation scores: [0.9565217391304348, 0.8478260869565217, 0.9111111111111111, 0.8444444444444444, 0.8888888888888888] 
 Average accuracy: 0.8897584541062802



### M = 4096

In [111]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/L/M=4096/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2049

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 0.9777777777777777, 1.0, 1.0] 
 Average accuracy: 0.9955555555555555

Training model for 4 class
Results:
 Cross validation scores: [1.0, 0.9565217391304348, 0.9555555555555556, 0.9111111111111111, 1.0] 
 Average accuracy: 0.9646376811594202

Training model for 10 class
Results:
 Cross validation scores: [0.9565217391304348, 0.782608695652174, 0.9111111111111111, 0.8666666666666667, 0.8888888888888888] 
 Average accuracy: 0.881159420289855



## Segment by a factor of 10

### M=1024

In [112]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L/M=1024/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 513

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.9933920704845814, 0.9911894273127754, 0.9801762114537445, 0.9889867841409692, 0.9911894273127754] 
 Average accuracy: 0.9889867841409691

Training model for 10 class
Results:
 Cross validation scores: [0.9317180616740088, 0.9383259911894273, 0.9273127753303965, 0.9096916299559471, 0.9317180616740088] 
 Average accuracy: 0.9277533039647576



### M = 2048

In [113]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L/M=2048/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 1025

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.9955947136563876, 0.9933920704845814, 0.9823788546255506, 0.986784140969163, 0.9801762114537445] 
 Average accuracy: 0.9876651982378855

Training model for 10 class
Results:
 Cross validation scores: [0.9383259911894273, 0.9559471365638766, 0.9317180616740088, 0.9383259911894273, 0.9229074889867841] 
 Average accuracy: 0.9374449339207048



### M = 4096

In [114]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/L/M=4096/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2049

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 1.0, 1.0] 
 Average accuracy: 1.0

Training model for 4 class
Results:
 Cross validation scores: [0.986784140969163, 0.9911894273127754, 0.9823788546255506, 0.9933920704845814, 0.9911894273127754] 
 Average accuracy: 0.9889867841409691

Training model for 10 class
Results:
 Cross validation scores: [0.9493392070484582, 0.9493392070484582, 0.9559471365638766, 0.9493392070484582, 0.9625550660792952] 
 Average accuracy: 0.9533039647577093



# H band

## Entire signal as input

### M=1024

In [115]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/H/M=1024/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 513

Training model for 2 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 1.0, 1.0, 0.9555555555555556] 
 Average accuracy: 0.9867632850241546

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 1.0, 0.9777777777777777, 0.9333333333333333] 
 Average accuracy: 0.9778743961352656

Training model for 10 class
Results:
 Cross validation scores: [0.9565217391304348, 0.9565217391304348, 0.9555555555555556, 0.9333333333333333, 0.8666666666666667] 
 Average accuracy: 0.9337198067632851



### M = 2048

In [116]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/H/M=2048/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 1025

Training model for 2 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 1.0, 1.0, 0.9777777777777777] 
 Average accuracy: 0.9912077294685989

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 0.9333333333333333, 0.9333333333333333, 0.9333333333333333] 
 Average accuracy: 0.9556521739130435

Training model for 10 class
Results:
 Cross validation scores: [1.0, 0.9347826086956522, 1.0, 0.9333333333333333, 0.9777777777777777] 
 Average accuracy: 0.9691787439613527



### M = 4096

In [117]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e7/H/M=4096/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2049

Training model for 2 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 1.0, 0.9777777777777777, 0.9777777777777777] 
 Average accuracy: 0.9867632850241546

Training model for 4 class
Results:
 Cross validation scores: [0.9782608695652174, 1.0, 1.0, 0.9777777777777777, 0.9777777777777777] 
 Average accuracy: 0.9867632850241546

Training model for 10 class
Results:
 Cross validation scores: [0.9782608695652174, 0.9347826086956522, 0.9777777777777777, 0.9333333333333333, 0.9333333333333333] 
 Average accuracy: 0.9514975845410628



## Segment by a factor of 10

### M=1024

In [118]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/H/M=1024/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 513

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 0.9911894273127754, 1.0] 
 Average accuracy: 0.9982378854625551

Training model for 4 class
Results:
 Cross validation scores: [0.9801762114537445, 0.9933920704845814, 0.9911894273127754, 0.9977973568281938, 0.9911894273127754] 
 Average accuracy: 0.9907488986784141

Training model for 10 class
Results:
 Cross validation scores: [0.9933920704845814, 0.9911894273127754, 0.9889867841409692, 0.9911894273127754, 0.986784140969163] 
 Average accuracy: 0.9903083700440529



### M = 2048

In [119]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/H/M=2048/psd_2048_H.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 1025

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 0.9911894273127754, 1.0] 
 Average accuracy: 0.9982378854625551

Training model for 4 class
Results:
 Cross validation scores: [0.9845814977973568, 0.9845814977973568, 0.9801762114537445, 0.9889867841409692, 0.9823788546255506] 
 Average accuracy: 0.9841409691629955

Training model for 10 class
Results:
 Cross validation scores: [0.9889867841409692, 0.9955947136563876, 0.9977973568281938, 0.9647577092511013, 0.9779735682819384] 
 Average accuracy: 0.985022026431718



### M = 4096

In [120]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/L=1e6/H/M=4096/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2049

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 0.9933920704845814, 0.9955947136563876, 1.0] 
 Average accuracy: 0.9977973568281937

Training model for 4 class
Results:
 Cross validation scores: [0.9977973568281938, 0.9933920704845814, 0.9889867841409692, 0.9977973568281938, 0.9889867841409692] 
 Average accuracy: 0.9933920704845814

Training model for 10 class
Results:
 Cross validation scores: [0.9647577092511013, 0.9845814977973568, 0.9581497797356828, 0.9713656387665198, 0.9823788546255506] 
 Average accuracy: 0.9722466960352423

