In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import os
import seaborn as sns

import pywt
import scipy
from scipy import signal
from scipy.signal import butter, deconvolve

In [2]:
test_path = os.path.join('..','data','test.csv')
train_path = os.path.join('..','data','train.csv')

test_df = pd.read_csv(test_path)
train_df = pd.read_csv(train_path)

In [3]:
def maddest(d, axis=None):
    return np.mean(np.absolute(d - np.mean(d, axis)), axis)

def denoise_signal(x, wavelet='db4', level=1):
    coeff = pywt.wavedec(x, wavelet, mode="per")
    sigma = (1/0.6745) * maddest(coeff[-level])

    uthresh = sigma * np.sqrt(2*np.log(len(x)))
    coeff[1:] = (pywt.threshold(i, value=uthresh, mode='hard') for i in coeff[1:])

    return pywt.waverec(coeff, wavelet, mode='per')

In [4]:
time_train = train_df.time
signal_train_denoised = denoise_signal(train_df.signal)
open_channels_train = train_df.open_channels

data = {
    'time': time_train,
    'signal_denoised': signal_train_denoised,
    'open_channels_train': open_channels_train
       }

train_data = pd.DataFrame(data=data).dropna()
train_data

Unnamed: 0,time,signal_denoised,open_channels_train
0,0.0001,-2.497504,0
1,0.0002,-3.543952,0
2,0.0003,-0.952305,0
3,0.0004,-1.326738,0
4,0.0005,-3.603200,0
...,...,...,...
4999995,499.9996,3.296552,7
4999996,499.9997,3.586595,7
4999997,499.9998,3.957363,8
4999998,499.9999,4.474106,9


scikit-learn

In [5]:
from sklearn.model_selection import train_test_split

X = train_data.drop('open_channels_train',axis=1)
y = train_data.open_channels_train.values.reshape(-1,1).ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Descision Tree No HyperParameter Tuning

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

pipe = Pipeline([('scaler', StandardScaler()), ('clf', DecisionTreeClassifier())])
pipe.fit(X_train, y_train)
pipe.score(X_test,y_test)

0.650988

Descision Tree With HyperParameter Tuning

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

params = dict(
    clf__criterion=['gini', 'entropy','log_loss'],
    # clf__splitter=['best', "random"],
    clf__max_depth=np.arange(1, 20, 1),
    clf__min_samples_split=np.arange(1, 10, 1),
    # clf__min_samples_leaf=np.arange(1, 10, 1)
)


pipe = Pipeline([('scaler', StandardScaler()), ('clf', DecisionTreeClassifier())])

search = GridSearchCV(pipe, params, n_jobs=3)

In [8]:
%%time
search.fit(X_train, y_train)
search.score(X_test, y_test)

285 fits failed out of a total of 2565.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
285 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/aymansulaiman/miniforge3/envs/tf/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/aymansulaiman/miniforge3/envs/tf/lib/python3.10/site-packages/sklearn/pipeline.py", line 382, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/Users/aymansulaiman/miniforge3/envs/tf/lib/python3.10/site-packages/sklearn/tree/_classes.py", line 969, in fit
    super().fit(
  File "/Users/aymansulaiman/miniforge3/envs/tf/lib/python3.10/site-pac

CPU times: user 26 s, sys: 1.13 s, total: 27.2 s
Wall time: 3h 9min 35s


0.668409

XGBoost

In [9]:
from sklearn.preprocessing import QuantileTransformer, StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [10]:
import xgboost as xgb

In [11]:
model = xgb.XGBClassifier()
model.fit(X_train_scaled, y_train_categorical)
model.score(X_test_scaled, y_test_categorical)

0.521784

XGBoost HyperParameter Tuning

In [33]:
params = {
    "clf__objective": ["multi:softprob"],
    "clf__num_class": [11],
    "clf__learning_rate" : [0.5],
    "clf__max_leaves": [2**4],
    "clf__grow_policy": ["lossguide"],
    'clf__min_child_weight': [50],
    'clf__lambda': [1],
    'clf__eval_metric': ['mlogloss']
}

pipe = Pipeline([('scaler', StandardScaler()), ('clf', xgb.XGBClassifier())])
search = GridSearchCV(pipe, params, n_jobs=3)

In [34]:
%%time
search.fit(X_train, y_train)
search.score(X_test, y_test)

CPU times: user 1h 1min 15s, sys: 14min 14s, total: 1h 15min 30s
Wall time: 47min 5s


0.668436

In [35]:
search.best_params_

{'clf__eval_metric': 'mlogloss',
 'clf__grow_policy': 'lossguide',
 'clf__lambda': 1,
 'clf__learning_rate': 0.5,
 'clf__max_leaves': 16,
 'clf__min_child_weight': 50,
 'clf__num_class': 11,
 'clf__objective': 'multi:softprob'}

Tensorflow

Tensorflow with hyperparameter tuning

Tensorflow time series classification

Tensorflow ConV NN