## Imports

In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegressionCV, RidgeClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

## Data Exploration

In [2]:
inputs = np.load('classification/inputs.npy')
inputs.shape

(1000, 20)

In [3]:
labels = np .load('classification/labels.npy')
labels.shape

(1000, 1)

## Data preparation

In [4]:
StandardScaler().fit_transform(inputs)
X_train, X_test, y_train, y_test = train_test_split(inputs, labels, test_size=0.2, random_state=42)

In [5]:
print("X_train : " + str(X_train.shape))
print("X_test : " + str(X_test.shape))
print("y_train : " + str(y_train.shape))
print("y_test : " + str(y_test.shape))

X_train : (800, 20)
X_test : (200, 20)
y_train : (800, 1)
y_test : (200, 1)


## Ridge Classifier

In [6]:
model = RidgeClassifierCV()
model.fit(X_train, y_train.reshape((y_train.shape[0])))

predictions = model.predict(X_train)
print("Train:", accuracy_score(y_train, predictions))

predictions = model.predict(X_test)
print("Test:", accuracy_score(y_test, predictions))

Train: 0.89875
Test: 0.9


## Logistic Regression Classifier

In [10]:
model = LogisticRegressionCV(cv=5, random_state=0)
model.fit(X_train, y_train.reshape((y_train.shape[0])))

predictions = model.predict(X_train)
print("Train:", accuracy_score(y_train, predictions))

predictions = model.predict(X_test)
print("Test:", accuracy_score(y_test, predictions))

Train: 0.89625
Test: 0.91


## Random Forest Classifier

In [8]:
model = RandomForestClassifier(max_depth=10, random_state=0)
model.fit(X_train, y_train.reshape((y_train.shape[0])))

predictions = model.predict(X_train)
print("Train:", accuracy_score(y_train, predictions))

predictions = model.predict(X_test)
print("Test:", accuracy_score(y_test, predictions))

Train: 0.99875
Test: 0.845
