<a href="https://colab.research.google.com/github/lblum95/AML/blob/master/task3/1DCNNClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1DCNNClassifier

## Connect to My Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/My Drive')

Mounted at /content/drive


## Import libraries

In [None]:
#general
import pandas as pd
import numpy as np
#sklearn
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.preprocessing import OneHotEncoder,StandardScaler
#Keras
import keras
import keras.backend as K
import tensorflow as tf
from tensorflow.keras.layers import Conv1D,MaxPooling1D,Flatten,BatchNormalization
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.metrics import Precision, Recall
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler, ReduceLROnPlateau

## Prepare data

In [None]:
#Import data
x_train = pd.read_csv("data/X_train_features.csv", index_col=0, header=0, low_memory=False)
y_train = pd.read_csv("data/y_train.csv", index_col=0, header=0)
x_test = pd.read_csv("data/X_test_features.csv", index_col=0, header=0, low_memory=False)
#Only take time avg. segment
normal_features=x_train.iloc[:,57:]

#only try easier problem
print(normal_features.shape)
normal_features=normal_features[y_train['y']<3]
print(normal_features.shape)
y_tr=y_train[y_train['y']<3]

#Imputer and scaler
imputer = SimpleImputer()
scaler = StandardScaler()
features=imputer.fit_transform(normal_features)
selected=scaler.fit_transform(features)

(5117, 1820)
(4947, 1820)


### Prepare data for CNN

In [None]:
stacked=[]
length=180
for row in range(len(selected)):
  current_row=[]
  for time in range(length):
    current_signal=[]
    for feature in range(5):
      current_signal.append(selected[row,57+feature*length+time])
    current_row.append(current_signal)
  stacked.append(current_row)
stacked=np.array(stacked)

## Model

### Different losses

In [None]:
# different loss for Keras to optimize, performed worse than categorical_crossentropy
def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    #f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    #f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return 1 - K.mean(f1)
    
def categorical_focal_loss(gamma=2.0, alpha=0.25):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha*((1-p)^gamma)*log(p)
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
    """
    def focal_loss(y_true, y_pred):
        # Define epsilon so that the backpropagation will not result in NaN
        # for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        #y_pred = y_pred + epsilon
        # Clip the prediction value
        y_pred = K.clip(y_pred, epsilon, 1.0-epsilon)
        # Calculate cross entropy
        cross_entropy = -y_true*K.log(y_pred)
        # Calculate weight that consists of  modulating factor and weighting factor
        weight = alpha * y_true * K.pow((1-y_pred), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = K.sum(loss, axis=1)
        return loss
    
    return focal_loss

### Create model

In [None]:
# adjusted those params
input_dim=180
dropout=0.2
def get_model():
	# create model
  model = Sequential()
  # Normalization on and off tried
  model.add(BatchNormalization())
  model.add(Dropout(dropout))
  model.add(Dense(input_dim, activation='selu'))
  model.add(Dropout(dropout))
  model.add(Dense(input_dim, activation='selu'))
  model.add(Dropout(dropout))
  model.add(Dense(input_dim, activation='selu'))
  model.add(Dropout(dropout))
  model.add(Dense(3, activation='softmax'))
  # Compile model
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[Precision(), Recall()])
  return model

### Train model

In [None]:
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(y_tr)
skf=StratifiedKFold(n_splits=5)
for train_index,test_index in skf.split(selected,y_tr):
  X_train = selected[train_index]
  X_test = selected[test_index]
  Y_train = enc.transform(y_tr.iloc[train_index]).toarray()
  Y_test = y_tr.iloc[test_index]
  model = get_model()
  early = EarlyStopping(monitor="val_loss", mode="auto", patience=20, verbose=1)
  redonplat = ReduceLROnPlateau(monitor="val_loss", mode="auto", patience=10, verbose=2)
  callbacks_list = [early, redonplat]  # early
  model.fit(X_train, Y_train, epochs=1000, verbose=2, callbacks=callbacks_list, validation_split=0.1)
  model.summary()
  Y_pred=enc.inverse_transform(model.predict(X_test))
  print(confusion_matrix(Y_test, Y_pred))
  print('Score')
  print(f1_score(Y_test, Y_pred, average='micro'))
  print()
  print()
  break

Epoch 1/1000
112/112 - 1s - loss: 1.3844 - precision_95: 0.5851 - recall_95: 0.5636 - val_loss: 0.7921 - val_precision_95: 0.7049 - val_recall_95: 0.6515
Epoch 2/1000
112/112 - 1s - loss: 1.0713 - precision_95: 0.6219 - recall_95: 0.5922 - val_loss: 0.6842 - val_precision_95: 0.7127 - val_recall_95: 0.6515
Epoch 3/1000
112/112 - 0s - loss: 0.9307 - precision_95: 0.6580 - recall_95: 0.6170 - val_loss: 0.6810 - val_precision_95: 0.7352 - val_recall_95: 0.6591
Epoch 4/1000
112/112 - 1s - loss: 0.8225 - precision_95: 0.6756 - recall_95: 0.6347 - val_loss: 0.6701 - val_precision_95: 0.7345 - val_recall_95: 0.6566
Epoch 5/1000
112/112 - 0s - loss: 0.7360 - precision_95: 0.6965 - recall_95: 0.6549 - val_loss: 0.7071 - val_precision_95: 0.7095 - val_recall_95: 0.6414
Epoch 6/1000
112/112 - 0s - loss: 0.6971 - precision_95: 0.7151 - recall_95: 0.6723 - val_loss: 0.6742 - val_precision_95: 0.7258 - val_recall_95: 0.6818
Epoch 7/1000
112/112 - 0s - loss: 0.6642 - precision_95: 0.7174 - recall_95:

### Create model with bottleneck

In [None]:
#played with those params, filter, kernel size, and amount of dense layers at the end
input_dim=256
dropout=0.1
def get_bottle():
  model = Sequential()
  model.add(BatchNormalization())
  model.add(Conv1D(64, 3, activation='selu', input_shape=(180, 5)))
  model.add(MaxPooling1D(3))
  model.add(Dropout(dropout))
  model.add(Conv1D(64, 3, activation='selu'))
  model.add(MaxPooling1D(3))
  model.add(Flatten())
  model.add(Dropout(dropout))
  model.add(Dense(input_dim, activation='selu'))
  model.add(Dropout(dropout))
  model.add(Dense(input_dim/2, activation='selu'))
  model.add(Dropout(dropout))
  model.add(Dense(3, activation='softmax'))
  # Compile model
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[Precision(), Recall()])
  return model

### Train model with bottleneck

In [None]:
skf=StratifiedKFold(n_splits=5)
for train_index,test_index in skf.split(stacked,y_tr):
  X_train = stacked[train_index]
  X_test = stacked[test_index]
  Y_train = enc.transform(y_tr.iloc[train_index]).toarray()
  Y_test = y_tr.iloc[test_index]
  model = get_bottle()
  early = EarlyStopping(monitor="val_loss", mode="auto", patience=20, verbose=1)
  redonplat = ReduceLROnPlateau(monitor="val_loss", mode="auto", patience=10, verbose=2)
  callbacks_list = [early, redonplat]  # early

  model.fit(X_train, Y_train,batch_size=len(X_train), epochs=1000, verbose=2, callbacks=callbacks_list, validation_split=0.1)
  Y_pred=enc.inverse_transform(model.predict(X_test))
  print('Score')
  print(f1_score(Y_test, Y_pred, average='micro'))
  print()
  print()
  model.summary()
  break

Epoch 1/1000
1/1 - 0s - loss: 1.6701 - precision_105: 0.2282 - recall_105: 0.1733 - val_loss: 1.2485 - val_precision_105: 0.5833 - val_recall_105: 0.5657
Epoch 2/1000
1/1 - 0s - loss: 1.2422 - precision_105: 0.5823 - recall_105: 0.5673 - val_loss: 1.1172 - val_precision_105: 0.6343 - val_recall_105: 0.6263
Epoch 3/1000
1/1 - 0s - loss: 1.2315 - precision_105: 0.6402 - recall_105: 0.6327 - val_loss: 0.9880 - val_precision_105: 0.6451 - val_recall_105: 0.6288
Epoch 4/1000
1/1 - 0s - loss: 1.1756 - precision_105: 0.6261 - recall_105: 0.6192 - val_loss: 0.8770 - val_precision_105: 0.6736 - val_recall_105: 0.6515
Epoch 5/1000
1/1 - 0s - loss: 1.0858 - precision_105: 0.6216 - recall_105: 0.6122 - val_loss: 0.8283 - val_precision_105: 0.6841 - val_recall_105: 0.6616
Epoch 6/1000
1/1 - 0s - loss: 0.9954 - precision_105: 0.6385 - recall_105: 0.6240 - val_loss: 0.8062 - val_precision_105: 0.6684 - val_recall_105: 0.6313
Epoch 7/1000
1/1 - 0s - loss: 0.9213 - precision_105: 0.6639 - recall_105: 0