In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install xgboost



In [3]:
!pip install imblearn



In [4]:
# Basic data handling libraries
import numpy as np
import pandas as pd
np.random.seed(1)

# Cross validation and hyperparameter tuning libraries
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Machine learning classifiers
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

In [5]:
from imblearn.over_sampling import SMOTE
smote = SMOTE()

In [6]:
def train_model_for_a_class(x,Label,K,use_smote):
  # Cross validation and model training
  cvscores = []
  avg_score = 0.0
  kfold = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)

  for train, test in kfold.split(x, Label):
      if len(np.unique(Label)) == 2:
        model = XGBClassifier(objective = "binary:logistic",seed=42)
      else:
        model = XGBClassifier(objective = "multi:softmax",seed=42)
      if use_smote:
        X_train_smote,y_train_smote = smote.fit_resample(x[train],Label[train])
        model.fit(X_train_smote, y_train_smote)
      else:
        model.fit(x[train], Label[train])
      score = model.score(x[test], Label[test])
      cvscores.append(score)
      avg_score = avg_score + score

  avg_score = avg_score/K
  return avg_score, cvscores

In [7]:
def train_model(data_file_path,use_smote=False): 

  data = np.loadtxt(data_file_path, delimiter=",")

  num_samples = data.shape[1]
  num_features = data.shape[0]-3

  x = np.transpose(data[0:num_features:])
  Label_1 = np.transpose(data[num_features:num_features+1,:]); Label_1 = Label_1.astype(int);
  Label_2 = np.transpose(data[num_features+1:num_features+2,:]); Label_2 = Label_2.astype(int);
  Label_3 = np.transpose(data[num_features+2:num_features+3,:]); Label_3 = Label_3.astype(int);

  print("Number of points in the dataset: {}".format(num_samples))
  print("Number of features in each datapoint: {}\n".format(num_features))


  # Preprocessing
  scl = StandardScaler()
  x = scl.fit_transform(x)

  print("Training model for 2 class")
  avg_score_2_class, cvscores_2_class = train_model_for_a_class(x,Label_1,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_2_class, avg_score_2_class))

  print("Training model for 4 class")
  avg_score_4_class, cvscores_4_class = train_model_for_a_class(x,Label_2,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_4_class, avg_score_4_class))

  print("Training model for 10 class")
  avg_score_10_class, cvscores_10_class = train_model_for_a_class(x,Label_3,5,use_smote)
  print("Results:\n Cross validation scores: {} \n Average accuracy: {}\n".format(cvscores_10_class, avg_score_10_class))

In [8]:
# M stands for the number of frequency bins
# L, H are the low and high bands
# Segment factor is the factor by which a single signal of (1e7) points is segmented into before performing the FFT transformation

# M = 1024

## (L+H)

### Segment factor = 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Both bands/L=1e5/data.csv",use_smote=True)

Number of points in the dataset: 22700
Number of features in each datapoint: 1024

Training model for 2 class
Results:
 Cross validation scores: [0.9991189427312775, 0.9984581497797357, 0.9995594713656387, 0.9993392070484581, 0.9991189427312775] 
 Average accuracy: 0.9991189427312774

Training model for 4 class
Results:
 Cross validation scores: [0.8700440528634361, 0.8634361233480177, 0.8643171806167401, 0.8612334801762115, 0.864977973568282] 
 Average accuracy: 0.8648017621145374

Training model for 10 class
Results:
 Cross validation scores: [0.6083700440528634, 0.6121145374449339, 0.6107929515418502, 0.6231277533039647, 0.6044052863436123] 
 Average accuracy: 0.6117621145374449



### Segment factor = 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Both bands/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 1024

Training model for 2 class
Results:
 Cross validation scores: [1.0, 0.9977973568281938, 0.9933920704845814, 0.9955947136563876, 0.9955947136563876] 
 Average accuracy: 0.9964757709251101

Training model for 4 class
Results:
 Cross validation scores: [0.9229074889867841, 0.9427312775330396, 0.947136563876652, 0.9427312775330396, 0.9405286343612335] 
 Average accuracy: 0.9392070484581497

Training model for 10 class
Results:
 Cross validation scores: [0.5925110132158591, 0.6607929515418502, 0.6651982378854625, 0.6475770925110133, 0.6541850220264317] 
 Average accuracy: 0.6440528634361233



### Take entire input as signal

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Both bands/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 1024

Training model for 2 class
Results:
 Cross validation scores: [0.9782608695652174, 0.9782608695652174, 0.9333333333333333, 0.8888888888888888, 0.9555555555555556] 
 Average accuracy: 0.9468599033816425

Training model for 4 class
Results:
 Cross validation scores: [0.8695652173913043, 0.8260869565217391, 0.9333333333333333, 0.8888888888888888, 0.9333333333333333] 
 Average accuracy: 0.8902415458937197

Training model for 10 class
Results:
 Cross validation scores: [0.5, 0.5, 0.6666666666666666, 0.5777777777777777, 0.5555555555555556] 
 Average accuracy: 0.5599999999999999



## (H)

### Segment by factor of 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Upper band/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Upper band/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 512

Training model for 2 class
Results:
 Cross validation scores: [0.8986784140969163, 0.8942731277533039, 0.8788546255506607, 0.9118942731277533, 0.8942731277533039] 
 Average accuracy: 0.8955947136563877

Training model for 4 class
Results:
 Cross validation scores: [0.6321585903083701, 0.579295154185022, 0.6541850220264317, 0.6255506607929515, 0.6321585903083701] 
 Average accuracy: 0.6246696035242291

Training model for 10 class
Results:
 Cross validation scores: [0.6079295154185022, 0.6035242290748899, 0.5704845814977973, 0.5770925110132159, 0.6277533039647577] 
 Average accuracy: 0.5973568281938325



### Take entire input as signal

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Upper band/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 512

Training model for 2 class
Results:
 Cross validation scores: [0.9565217391304348, 1.0, 0.9555555555555556, 0.9777777777777777, 0.9333333333333333] 
 Average accuracy: 0.9646376811594204

Training model for 4 class
Results:
 Cross validation scores: [0.717391304347826, 0.6521739130434783, 0.6666666666666666, 0.5555555555555556, 0.4666666666666667] 
 Average accuracy: 0.6116908212560386

Training model for 10 class
Results:
 Cross validation scores: [0.43478260869565216, 0.43478260869565216, 0.4222222222222222, 0.4888888888888889, 0.4888888888888889] 
 Average accuracy: 0.45391304347826084



## (L)

### Segment by factor of 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Lower band/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Lower band/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 512

Training model for 2 class
Results:
 Cross validation scores: [1.0, 0.9977973568281938, 0.9911894273127754, 0.9933920704845814, 0.9977973568281938] 
 Average accuracy: 0.9960352422907489

Training model for 4 class
Results:
 Cross validation scores: [0.9295154185022027, 0.9449339207048458, 0.9427312775330396, 0.9405286343612335, 0.9383259911894273] 
 Average accuracy: 0.9392070484581497

Training model for 10 class
Results:
 Cross validation scores: [0.6123348017621145, 0.6696035242290749, 0.6651982378854625, 0.6563876651982379, 0.6563876651982379] 
 Average accuracy: 0.6519823788546255



### Take entire input as signal

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=1024/Lower band/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 512

Training model for 2 class
Results:
 Cross validation scores: [1.0, 0.9782608695652174, 0.9777777777777777, 0.8666666666666667, 0.9555555555555556] 
 Average accuracy: 0.9556521739130435

Training model for 4 class
Results:
 Cross validation scores: [0.9130434782608695, 0.8913043478260869, 0.9777777777777777, 0.9333333333333333, 0.9111111111111111] 
 Average accuracy: 0.9253140096618357

Training model for 10 class
Results:
 Cross validation scores: [0.5869565217391305, 0.6086956521739131, 0.7111111111111111, 0.6222222222222222, 0.5333333333333333] 
 Average accuracy: 0.612463768115942



# M = 2048



## (H+L)

### Segment factor = 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Both bands/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Both bands/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2048

Training model for 2 class
Results:
 Cross validation scores: [0.9977973568281938, 0.9955947136563876, 1.0, 0.9977973568281938, 0.9977973568281938] 
 Average accuracy: 0.9977973568281937

Training model for 4 class
Results:
 Cross validation scores: [0.9779735682819384, 0.9669603524229075, 0.973568281938326, 0.9493392070484582, 0.986784140969163] 
 Average accuracy: 0.9709251101321585

Training model for 10 class
Results:
 Cross validation scores: [0.6828193832599119, 0.7158590308370044, 0.698237885462555, 0.7290748898678414, 0.724669603524229] 
 Average accuracy: 0.7101321585903083



### Take entire input as signal

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Both bands/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2048

Training model for 2 class
Results:
 Cross validation scores: [1.0, 0.9565217391304348, 0.9555555555555556, 0.9111111111111111, 0.9777777777777777] 
 Average accuracy: 0.9601932367149757

Training model for 4 class
Results:
 Cross validation scores: [0.8260869565217391, 0.8478260869565217, 0.9555555555555556, 0.7777777777777778, 0.7777777777777778] 
 Average accuracy: 0.8370048309178744

Training model for 10 class
Results:
 Cross validation scores: [0.5869565217391305, 0.5, 0.5333333333333333, 0.6, 0.6] 
 Average accuracy: 0.5640579710144927



## (H)

### Segment by factor of 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Upper band/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Upper band/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 1024

Training model for 2 class
Results:
 Cross validation scores: [0.9118942731277533, 0.9295154185022027, 0.9317180616740088, 0.9140969162995595, 0.9361233480176211] 
 Average accuracy: 0.9246696035242291

Training model for 4 class
Results:
 Cross validation scores: [0.6233480176211453, 0.7048458149779736, 0.6563876651982379, 0.6894273127753304, 0.6497797356828194] 
 Average accuracy: 0.6647577092511014

Training model for 10 class
Results:
 Cross validation scores: [0.5616740088105727, 0.5572687224669604, 0.5198237885462555, 0.4933920704845815, 0.5352422907488987] 
 Average accuracy: 0.5334801762114537



### Take entire input as signal

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Upper band/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 1024

Training model for 2 class
Results:
 Cross validation scores: [1.0, 0.9565217391304348, 0.9777777777777777, 0.9555555555555556, 0.9777777777777777] 
 Average accuracy: 0.9735265700483092

Training model for 4 class
Results:
 Cross validation scores: [0.6739130434782609, 0.5869565217391305, 0.6, 0.6888888888888889, 0.6] 
 Average accuracy: 0.6299516908212561

Training model for 10 class
Results:
 Cross validation scores: [0.43478260869565216, 0.4782608695652174, 0.4222222222222222, 0.4444444444444444, 0.4666666666666667] 
 Average accuracy: 0.44927536231884063



## (L)

### Segment factor = 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Lower band/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Lower band/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 1024

Training model for 2 class
Results:
 Cross validation scores: [1.0, 1.0, 1.0, 0.9977973568281938, 0.9955947136563876] 
 Average accuracy: 0.9986784140969164

Training model for 4 class
Results:
 Cross validation scores: [0.9361233480176211, 0.9229074889867841, 0.9515418502202643, 0.9185022026431718, 0.9559471365638766] 
 Average accuracy: 0.9370044052863434

Training model for 10 class
Results:
 Cross validation scores: [0.6497797356828194, 0.6828193832599119, 0.6585903083700441, 0.6784140969162996, 0.6894273127753304] 
 Average accuracy: 0.6718061674008811



### Take entire input as signal

In [None]:
rf_data = np.loadtxt("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=2048/Lower band/entire signal/data.csv", delimiter=",")

# M = 4096

## (H+L) 

### Segment by factor of 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Both bands/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Both bands/L=1e6/data.csv",use_smote=True)

### Take entire input as signal

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Both bands/entire signal/data.csv",use_smote=True)

## (H)

### Segment by factor of 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Upper band/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [10]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Upper band/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2048

Training model for 2 class
Results:
 Cross validation scores: [0.9757709251101322, 0.973568281938326, 0.9713656387665198, 0.9713656387665198, 0.9801762114537445] 
 Average accuracy: 0.9744493392070485

Training model for 4 class
Results:
 Cross validation scores: [0.7599118942731278, 0.7797356828193832, 0.7841409691629956, 0.748898678414097, 0.7775330396475771] 
 Average accuracy: 0.7700440528634361

Training model for 10 class
Results:
 Cross validation scores: [0.698237885462555, 0.6607929515418502, 0.6651982378854625, 0.6497797356828194, 0.6696035242290749] 
 Average accuracy: 0.6687224669603523



### Take entire input as signal

In [11]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Upper band/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2048

Training model for 2 class
Results:
 Cross validation scores: [0.9782608695652174, 0.9782608695652174, 0.9777777777777777, 1.0, 0.9777777777777777] 
 Average accuracy: 0.9824154589371981

Training model for 4 class
Results:
 Cross validation scores: [0.5869565217391305, 0.6956521739130435, 0.6, 0.6888888888888889, 0.6444444444444445] 
 Average accuracy: 0.6431884057971013

Training model for 10 class
Results:
 Cross validation scores: [0.45652173913043476, 0.43478260869565216, 0.4666666666666667, 0.4444444444444444, 0.5111111111111111] 
 Average accuracy: 0.46270531400966186



## (L)

### Segment by factor of 100

In [None]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Upper band/L=1e5/data.csv",use_smote=True)

### Segment by factor of 10

In [12]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Lower band/L=1e6/data.csv",use_smote=True)

Number of points in the dataset: 2270
Number of features in each datapoint: 2048

Training model for 2 class
Results:
 Cross validation scores: [0.9955947136563876, 0.9977973568281938, 1.0, 1.0, 1.0] 
 Average accuracy: 0.9986784140969164

Training model for 4 class
Results:
 Cross validation scores: [0.9691629955947136, 0.9691629955947136, 0.973568281938326, 0.9295154185022027, 0.9647577092511013] 
 Average accuracy: 0.9612334801762115

Training model for 10 class
Results:
 Cross validation scores: [0.8193832599118943, 0.8149779735682819, 0.8193832599118943, 0.7797356828193832, 0.8348017621145375] 
 Average accuracy: 0.8136563876651983



### Take entire input as signal

In [13]:
train_model("/content/drive/MyDrive/CEERI Project - Kalit/data/fft_extracted_features/M=4096/Lower band/entire signal/data.csv",use_smote=True)

Number of points in the dataset: 227
Number of features in each datapoint: 2048

Training model for 2 class
Results:
 Cross validation scores: [0.9130434782608695, 1.0, 0.9777777777777777, 0.9333333333333333, 0.9111111111111111] 
 Average accuracy: 0.9470531400966185

Training model for 4 class
Results:
 Cross validation scores: [0.8695652173913043, 0.9130434782608695, 0.9555555555555556, 0.9111111111111111, 0.9777777777777777] 
 Average accuracy: 0.9254106280193236

Training model for 10 class
Results:
 Cross validation scores: [0.7608695652173914, 0.6956521739130435, 0.7777777777777778, 0.7333333333333333, 0.6] 
 Average accuracy: 0.7135265700483092

