In [1]:
# library preparations
import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import joblib
import seaborn as sns
import time
import pickle

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

from sklearn.preprocessing import StandardScaler

from sklearn.decomposition import PCA

from sklearn.pipeline import Pipeline

In [2]:
load_train = np.load('./data/train.npz', allow_pickle=True)
load_test = np.load('./data/test_large.npz', allow_pickle=True)
X_train, y_train = load_train['X_train'], load_train['y_train']
X_test, y_test = load_test['X_test_large'], load_test['y_test_large']

samples, rows, cols = X_train.shape
print("Before flattening:")
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

y_train = y_train.reshape(y_train.shape[0], )
y_test = y_test.reshape(y_test.shape[0], )
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

print("After flattening:")
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

Before flattening:
(6500, 460, 12)
(6500, 1)
(6499, 460, 12)
(6499, 1)
After flattening:
(6500, 5520)
(6500,)
(6499, 5520)
(6499,)


In [7]:
from sklearn.svm import SVC

pipe = Pipeline([
    ('scaler', StandardScaler()), 
    ('svm', SVC())
])

param_grid = {
    'svm__kernel': ['linear', 'rbf', 'poly'],
    # 'svm__C': np.arange(1, 2, 0.1),
    # 'svm__degree': list(range(3, 5))
}

# ~30 min run time
clf = GridSearchCV(pipe, param_grid, scoring="accuracy", cv=5, n_jobs=2)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)
print('Test set Accuracy is: ', accuracy_score(y_test, pred))

Test set Accuracy is:  0.4942298815202339


In [8]:
from sklearn.neural_network import MLPClassifier

pipe = Pipeline([
    ('scaler', StandardScaler()), 
    ('mlp', MLPClassifier())
])

param_grid = {
    # 'mlp__hidden_layer_sizes': [(i, ) for i in range(30, 60, 10)],
    'mlp__activation': ['logistic', 'tanh', 'relu']
}

clf = GridSearchCV(pipe, param_grid, scoring="accuracy", cv=5, n_jobs=4)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)
print('Test set Accuracy is: ', accuracy_score(y_test, pred))

Test set Accuracy is:  0.4940760116941068
