In [1]:
import pandas as pd
import glob
import re
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
from pywt import wavedec
import pywt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
from scipy.signal import butter, filtfilt
from scipy import signal
import scipy



In [2]:
def butter_bandpass_filter(Input_Signal, Low_Cutoff, High_Cutoff,Sampling_Rate, order):
    nyq = 0.5*Sampling_Rate
    low=Low_Cutoff/nyq
    high =High_Cutoff/nyq
    Numerator, denominator = butter(order,[low,high],btype='band', output='ba',analog=False, fs=None)
    filtered = filtfilt(Numerator,denominator,Input_Signal)
    return filtered

In [3]:
def get_resampled_signal(Signal,resNum=60):
    resampled_Signal = []
    for i in Signal:
        re_Sgnl = signal.resample(i,resNum)
        resampled_Signal.append(re_Sgnl)
        
    return resampled_Signal

In [4]:
def remove_dc(Signal):
    mean=[np.mean(Signal[i]) for i in range(len(Signal))]
    RemovedDC_signal=[(Signal[i]-mean[i]) for i in range (len(Signal))]
    
    return RemovedDC_signal

    
    

In [5]:
def wavelet_feature_extraction(Signal):
    coeffs = wavedec(Signal , 'db1', level = 2)
    features = pywt.waverec([coeffs[0],coeffs[1]],'db1')
    
    return features
    


In [6]:
def psd(Signal):
    psd=[]
    for i in Signal:
        (f,s)=scipy.signal.periodogram(i,176,scaling='density')
        psd.append(s)
    return psd

In [7]:
def stat_features(x):
    return pd.DataFrame(x.apply(lambda x: [np.mean(x), np.std(x), np.median(x), np.min(x), np.max(x)]).tolist(),
                        columns=['mean', 'std', 'median', 'min', 'max'])
    

In [8]:
data=pd.read_csv("/kaggle/input/new-data-eog/newdata.csv")
val_eog=pd.read_csv("/kaggle/input/train-eog/train (1).csv")
test_eog=pd.read_csv("/kaggle/input/test-eog/test (2).csv")

In [9]:
x_train=data.drop("label",axis=1)
y_train=data["label"]

x_val=val_eog.drop("label",axis=1)
y_val=val_eog["label"]

x_test=test_eog.drop("label",axis=1)
y_test=test_eog["label"]


# Preprocessing

In [12]:
filtered_signal_train = butter_bandpass_filter(x_train,Low_Cutoff=0.5,High_Cutoff=20.0,Sampling_Rate=176,order=2)
filtered_signal_val = butter_bandpass_filter(x_val,Low_Cutoff=0.5,High_Cutoff=20.0,Sampling_Rate=176,order=2)
filtered_signal_test = butter_bandpass_filter(x_test,Low_Cutoff=0.5,High_Cutoff=20.0,Sampling_Rate=176,order=2)

resampled_Signal_train=get_resampled_signal(filtered_signal_train)
resampled_Signal_val=get_resampled_signal(filtered_signal_val)
resampled_Signal_test=get_resampled_signal(filtered_signal_test)

RemovedDC_signal_train=remove_dc(resampled_Signal_train)
RemovedDC_signal_val=remove_dc(resampled_Signal_val)
RemovedDC_signal_test=remove_dc(resampled_Signal_test)



x_train=wavelet_feature_extraction(RemovedDC_signal_train)
x_val=wavelet_feature_extraction(RemovedDC_signal_val)
x_test=wavelet_feature_extraction(RemovedDC_signal_test)


# x_train=psd(RemovedDC_signal_train)
# x_val=psd(RemovedDC_signal_val)
# x_test=psd(RemovedDC_signal_test)





# Machine Learning

KNN

In [10]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)
knn_train_score = knn.score(x_val,y_val)
print(f"val score with time domain: {knn_train_score}")
knn_train_score = knn.score(x_test,y_test)
print(f"test score with time domain: {knn_train_score}")




val score with time domain: 0.8888888888888888
test score with time domain: 1.0


In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train, y_train)
knn_train_score = knn.score(x_val,y_val)
print(f"val score: {knn_train_score}")
knn_train_score = knn.score(x_test,y_test)
print(f"test score: {knn_train_score}")


val score: 0.7444444444444445
test score: 0.7


Random Forest

In [40]:
from sklearn.ensemble import RandomForestClassifier
model=RandomForestClassifier(n_estimators=150,random_state=32)
model.fit(x_train,y_train)
print(f"val score: {model.score(x_val,y_val)}")
print(f"test score: {model.score(x_test,y_test)}")


val score: 0.7333333333333333
test score: 0.9


Adaboost

In [41]:
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators=30, learning_rate=0.04,base_estimator=RandomForestClassifier(),random_state=32)
abc.fit(x_train,y_train)
print(f"val score: {abc.score(x_val,y_val)}")
print(f"test score: {abc.score(x_test,y_test)}")





val score: 0.6777777777777778
test score: 0.8


Logistic Regression

In [42]:
from sklearn.linear_model import LogisticRegression
log = LogisticRegression(C = 0.04,max_iter=6000, random_state=40,verbose=1,penalty='none')
log.fit(x_train,y_train)
print(f"val score: {log.score(x_val,y_val)}")
print(f"test score: {log.score(x_test,y_test)}")



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =          155     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.44849D+05    |proj g|=  8.89303D+05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
  155     30     31      1     0     0   6.846D-05   2.110D-05
  F =   2.1104236495261830E-005

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
val score: 0.7444444444444445
test score: 0.6


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s finished


Xgboost

In [44]:
import xgboost as xgb
xgb_model = xgb.XGBRFClassifier(random_state=42)
xgb_model.fit(x_train, y_train)
print(f"val score: {xgb_model.score(x_val,y_val)}")
print(f"test score: {xgb_model.score(x_test,y_test)}")



val score: 0.4666666666666667
test score: 0.5


SVM

In [46]:
from sklearn.svm import SVC
svm = SVC(kernel = 'rbf',C = 16)
svm.fit(x_train,y_train)
print(f"val score: {svm.score(x_val,y_val)}")
print(f"test score: {svm.score(x_test,y_test)}")


val score: 0.7222222222222222
test score: 0.4


Naive Bayes

In [48]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(x_train, y_train)
print(f"val score: {nb.score(x_val,y_val)}")
print(f"test score: {nb.score(x_test,y_test)}")


val score: 0.4444444444444444
test score: 0.4


# Deep Learning

In [61]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader


In [62]:
x_train=data.drop("label",axis=1)
y_train=data["label"]

x_val=val_eog.drop("label",axis=1)
y_val=val_eog["label"]

x_test=test_eog.drop("label",axis=1)
y_test=test_eog["label"]


In [63]:
LEARNING_RATE=1e-4
WEIGH_DECAY=5e-4
NUM_EPOCHS=10

In [64]:
Device ="cuda" if torch.cuda.is_available() else "cpu"

In [65]:
class mlp(nn.Module):
    def __init__(self,n_features,hidden_dim,p=0.5):
        super().__init__()
        dims=(n_features,)+hidden_dim
        self.n_layers=len(hidden_dim)
        self.hidden_layers=nn.ModuleList([nn.Sequential(
        nn.Linear(dims[i],dims[i+1]),
        #nn.BatchNorm1d(dims[i+1]),
        nn.LeakyReLU(0.3),
        nn.Dropout(p)   
        ) for i in range(self.n_layers)
                                         ])
        self.clf=nn.Linear(dims[-1],5)
        
    def forward(self,x):
        for m in self.hidden_layers:
            x=m(x)
        x=self.clf(x)
        return x

In [66]:
model=mlp(502,(400,300,100,50))
model=model.to(Device)

In [67]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=LEARNING_RATE,weight_decay=WEIGH_DECAY)

In [68]:
def save_checkpoint(state):
    print("=> saving checkpoint")
    torch.save(state,"/kaggle/working/model_eog.pt")

In [69]:
def train_one_epoch(loader,model,optimizer,loss_fn,device):
    losses=[]
    for barch_idx,(data,targets)in enumerate(loader):
        
        data=data.to(device=device)
        targets=torch.LongTensor(targets).to(device=device)
            
        scores=model(data.float())
        loss=loss_fn(scores,targets)
            
        losses.append(loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
            
    print(f"Loss average over epoch: {sum(losses)/len(losses)}")

In [70]:
def check_accuracy(loader,model,device="cuda"):
    model.eval()
    num_correct=0
    num_samples=0
    
    for x,y in loader:
        x=x.to(device=device)
        y=y.to(device=device)
        
        with torch.no_grad():
            scores=model(x.float())
        _,predictions=scores.max(1)
        num_correct+=(predictions==y).sum()
        num_samples+=predictions.shape[0]
        
        #all_preds.append(predictions.detach().cpu().numpy())
        #all_labels.append(y.detach().cpu().numpy())
        
    print(f"Got {num_correct}/{num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}")
    model.train()

In [71]:
x_train=torch.tensor(x_train.values,dtype=torch.float32)
y_train=torch.tensor(y_train.values)

x_val=torch.tensor(x_val.values,dtype=torch.float32)
y_val=torch.tensor(y_val.values)

x_test=torch.tensor(x_test.values,dtype=torch.float32)
y_test=torch.tensor(y_test.values)

In [72]:
train_data=TensorDataset(x_train,y_train)
val_data=TensorDataset(x_val,y_val)
test_data=TensorDataset(x_test,y_test)


In [73]:
train_loader=DataLoader(train_data,batch_size=32,shuffle=True)
val_loader=DataLoader(val_data,batch_size=18,shuffle=True)
test_loader=DataLoader(test_data,batch_size=18,shuffle=True)

In [74]:
for epoch in range(NUM_EPOCHS):
    train_one_epoch(train_loader,model,optimizer,loss_fn,Device)
    check_accuracy(val_loader,model,Device)
    check_accuracy(test_loader,model,Device)

    
    checkpoint=model.state_dict()
    save_checkpoint(checkpoint)

Loss average over epoch: 0.6031210943502783
Got 82/90 with accuracy 91.11
Got 10/10 with accuracy 100.00
=> saving checkpoint
Loss average over epoch: 0.08194474365678264
Got 80/90 with accuracy 88.89
Got 10/10 with accuracy 100.00
=> saving checkpoint
Loss average over epoch: 0.045178140717123255
Got 83/90 with accuracy 92.22
Got 10/10 with accuracy 100.00
=> saving checkpoint
Loss average over epoch: 0.040017815362146346
Got 78/90 with accuracy 86.67
Got 9/10 with accuracy 90.00
=> saving checkpoint
Loss average over epoch: 0.0302589459278893
Got 82/90 with accuracy 91.11
Got 10/10 with accuracy 100.00
=> saving checkpoint
Loss average over epoch: 0.02612330081048491
Got 81/90 with accuracy 90.00
Got 10/10 with accuracy 100.00
=> saving checkpoint
Loss average over epoch: 0.025720510863093324
Got 81/90 with accuracy 90.00
Got 10/10 with accuracy 100.00
=> saving checkpoint
Loss average over epoch: 0.020416138906192646
Got 81/90 with accuracy 90.00
Got 10/10 with accuracy 100.00
=> sa