# Mnist Fashion Test

In [36]:
import os
import urllib
import urllib.request
import cv2

import numpy as np
import matplotlib.pyplot as plt

from zipfile import ZipFile
import sys
sys.path.insert(1, r'../src')

from utils import one_hot_encode_index


## Data Preparation 

In [3]:
URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'
FILE = 'fashion_mnist_images.zip'
FOLDER = 'fashion_mnist_images'

if not os.path.isfile(FILE):
    print(f'Downloading {URL} and saving as {FILE}...')
    urllib.request.urlretrieve(URL, FILE)

print('Unzipping images...')
with ZipFile(FILE) as zip_images:
    zip_images.extractall(FOLDER)

print('Done!')


Downloading https://nnfs.io/datasets/fashion_mnist_images.zip and saving as fashion_mnist_images.zip...
Unzipping images...
Done!


## Loading Data 

In [13]:
# Loads a MNIST dataset
def load_mnist_dataset(dataset, path):
    # Scan all the directories and create a list of labels
    labels = os.listdir(os.path.join(path, dataset))
    # Create lists for samples and labels
    X = []
    y = []
    # For each label folder
    for label in labels:
        # And for each image in given folder
        for file in os.listdir(os.path.join(path, dataset, label)):
            # Read the image
            image = cv2.imread(os.path.join(path, dataset, label, file), cv2.IMREAD_UNCHANGED)
            # And append it and a label to the lists
            X.append(image)
            y.append(label)
    # Convert the data to proper numpy arrays and return
    return np.array(X), np.array(y).astype('uint8')


def create_data_mnist(path):
    # Load both sets separately
    X, y = load_mnist_dataset('train', path)
    X_test, y_test = load_mnist_dataset('test', path)
    y = one_hot_encode_index(y, 10)
    y_test = one_hot_encode_index(y_test, 10)
    
    # And return all the data
    return X, y, X_test, y_test


In [37]:
X, y, X_test, y_test = create_data_mnist('fashion_mnist_images')


## Preprocess data

### Scaling between -1 & 1

In [46]:
def scale_img(v):
    return (v - 127.5) /127.5

X = scale_img(X)
X_test = scale_img(X_test)

### Reshaping data 

In [49]:
def vectorize(v):
    return v.reshape(v.shape[0], -1)

In [50]:
X = vectorize(X)
X_test = vectorize(X_test)

### Shuffle Training data 

In [51]:
keys = np.array(range(X.shape[0]))
np.random.shuffle(keys)
X = X[keys]
y = y[keys]