In [None]:
import gzip
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, roc_curve, auc, make_scorer, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score, KFold, cross_val_predict
from sklearn.preprocessing import LabelBinarizer
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import GridSearchCV
import pandas as pd
import time

In [None]:
def load_fashion_mnist():
    """
    Loads Fashion MNIST dataset.
    
    Adapted from: https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py
    """
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'    
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

    with gzip.open(TRAIN_LABELS, 'rb') as tr_labels_file, gzip.open(TEST_LABELS, 'rb') as ts_labels_file:
        train_labels = np.frombuffer(tr_labels_file.read(), dtype=np.uint8, offset=8)
        test_labels = np.frombuffer(ts_labels_file.read(), dtype=np.uint8, offset=8)

    with gzip.open(TRAIN_IMAGES, 'rb') as tr_images_file, gzip.open(TEST_IMAGES, 'rb') as ts_images_file:
        train_images = np.frombuffer(tr_images_file.read(), dtype=np.uint8, offset=16).reshape(len(train_labels), 784)
        test_images = np.frombuffer(ts_images_file.read(), dtype=np.uint8, offset=16).reshape(len(test_labels), 784)

    return train_images, train_labels, test_images, test_labels

train_images, train_labels, test_images, test_labels = load_fashion_mnist()

In [None]:
y = train_labels
y.shape

In [None]:
# rescaling features using min-max scaling
scaler = MinMaxScaler()
X = scaler.fit_transform(train_images)
#train_scaled is X
test_scaled = scaler.fit_transform(test_images)
X.shape

In [None]:
small_y = y[:10000]
small_X = X[:10000,:]

In [None]:
print(small_X.shape)
print(small_y.shape)
#small_X and X have the same shape
#small_y and y have the same shape

In [None]:
# Create a nearest neighbor classifier
# ignore for this one/don't run!!
# clf = KNeighborsClassifier(n_neighbors=3, algorithm='kd_tree')
# clf

In [None]:
# y_pred = cross_val_predict(clf, small_X, small_y, cv=5)

In [None]:
# y_pred

In [None]:
# cm1 = confusion_matrix(small_y, y_pred, labels = [0, 1, 2, 3, 4,5,6,7,8,9])
# cm1

In [None]:
# accuracy = accuracy_score(small_y, y_pred)
# accuracy

In [None]:
# y_score = clf.predict_proba(small_X)
# y_score

In [None]:
lb = LabelBinarizer()
y_binarized = lb.fit_transform(small_y)
y_binarized

In [None]:
logistic_base = LogisticRegression(max_iter=10000)

In [None]:
start_time = time.time()
y_pred_log_base = cross_val_predict(logistic_base, small_X, small_y, cv=5)
accuracy_log_base = accuracy_score(small_y, y_pred_log_base)
print(time.time() - start_time)
print(accuracy_log_base)

In [None]:
accuracy_log_base
# params = {'C':[0.01, 0.1], 'penalty': ['l1', 'l2']}

# grid_logistic = GridSearchCV(logistic_tuning, params, cv=5, scoring = 'accuracy')
# grid_logistic.fit(small_X, small_y)

# print(grid_logistic.best_params_)
# print(grid_logistic.best_score_)

# # fails to converge :(

In [None]:
params_0 = {'C':[0.01], 'penalty': ['l1', 'l2']}

grid_logistic_0 = GridSearchCV(logistic_tuning, params_0, cv=5, scoring = 'accuracy')
grid_logistic_0.fit(small_X, small_y)

print(grid_logistic_0.best_params_)
print(grid_logistic_0.best_score_)

In [None]:
params_1 = {'C':[0.1], 'penalty': ['l1', 'l2']}

grid_logistic_1 = GridSearchCV(logistic_tuning, params_1, cv=5, scoring = 'accuracy')
grid_logistic_1.fit(small_X, small_y)

print(grid_logistic_1.best_params_)
print(grid_logistic_1.best_score_)

In [None]:
params_2 = {'C':[1], 'penalty': ['l1', 'l2']}

grid_logistic_1 = GridSearchCV(logistic_tuning, params_1, cv=5, scoring = 'accuracy')
grid_logistic_1.fit(small_X, small_y)

print(grid_logistic_1.best_params_)
print(grid_logistic_1.best_score_)