In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score # For cross validation
from sklearn.model_selection import GridSearchCV # For searching over possible values of hyperparameters
from sklearn.model_selection import train_test_split # For testing and training splits
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [2]:
#Read in Atlas data
data = np.loadtxt("ATLAS-data.csv",delimiter=",")
labels = np.loadtxt("ATLAS-labels.csv",delimiter=",")

#### Things to explore:
1. learning hyper parameters of classifiers
2. best number of layers/hidden units
3. applying dropout/regularisation
4. not marked on model performance

Covered in lectures:
1. K nearest neighbour


In [3]:
X_train, X_test, y_train, y_test = train_test_split(data,labels,test_size=0.1)

In [4]:
labels.shape

(10000,)

### Traditional Methods

#### KNN

In [5]:
#K Nearest neighbour
KNN = KNeighborsClassifier(n_neighbors=3)
KNN.fit(X_train, y_train)
KNN.score(X_test, y_test)

0.728

In [6]:
#KNN Cross Val
cross_val_score(KNN,X_train,y_train,cv=10)

array([0.73888889, 0.72      , 0.73888889, 0.74333333, 0.73777778,
       0.73888889, 0.73555556, 0.71888889, 0.73555556, 0.73111111])

In [None]:
#parameter search
grid = {"n_neighbors":range(1,30)}

model_cv = GridSearchCV(KNN,grid,cv=10,iid=False)
model_cv.fit(X_train,y_train)
model_cv.best_params_

In [None]:
KNN = KNeighborsClassifier(n_neighbors=18)
KNN.fit(X_train, y_train)
KNN.score(X_test, y_test)

In [9]:
#KNN Cross Val
cross_val_score(KNN,X_train,y_train,cv=10)

array([0.75777778, 0.75444444, 0.76222222, 0.76222222, 0.76777778,
       0.77666667, 0.75444444, 0.77      , 0.76555556, 0.77      ])

#### SVM

In [10]:
#SVM
SVM = SVC(gamma = 'auto')
SVM.fit(X_train, y_train)
SVM.score(X_test, y_test)

0.517

In [11]:
#SVM Cross Val
cross_val_score(SVM,X_train,y_train,cv=10)

array([0.53333333, 0.53333333, 0.53333333, 0.53333333, 0.53333333,
       0.53222222, 0.53222222, 0.53222222, 0.53222222, 0.53222222])

#### SVM Regression

In [12]:
modelSVR = SVR(gamma = 'auto')
modelSVR.fit(data,labels)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

### Deep Learning Methods

In [13]:
# Scaling the data
# Define the scaler 
scaler = StandardScaler().fit(X_train)

# Scale the train set
X_train = scaler.transform(X_train)

# Scale the test set
X_test = scaler.transform(X_test)

In [33]:
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=30))
model.add(Dense(units=10, activation='softmax'))

In [44]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

In [45]:
model.fit(X_train, y_train, epochs=5, batch_size=32)

Train on 9000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1a40bd1d68>

In [48]:
loss_and_metrics = model.evaluate(X_test, y_test)



In [49]:
classes = model.predict(X_test)