# Fashion Detection

You are a senior data scientist at a LA-based online store. Your CEO has come up with an eccentric idea to analyze upcoming fashion trends not through marketplace analytics, nor through advertisement analysis.

Instead, they plan to release a swarm of drones above the streets of Los Angeles and utilize computer vision to recognize the kinds of clothes people are wearing. Based on this insight, your company plans to make reactive changes to marketplace desires & trends.

You are tasked with generating a few machine learning models to test out the validity of such a model. Namely you will create:
* A kNN model with PCA pre-processing
* A feed-forward neural network in Keras

In [None]:
import numpy as np

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten

from tensorflow.keras.datasets import fashion_mnist

In [None]:
# load in your shopping dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
# TODO: view the shape of your training data
...

In [None]:
# TODO: view the content of your first row
...

In [None]:
# TODO:count the number of unique classes in your target variable
np.unique_counts(...)

In [None]:
# TODO: view one training example
plt.imshow(..., cmap='Blues')
plt.title(f"Clothing: {y_train[0]}")
plt.axis('off')
plt.show()

In [None]:
# helper function to view images of clothes
def plot_gallery(title, images, n_col=4, n_row=5):
    fig, axs = plt.subplots(
        nrows=n_row,
        ncols=n_col,
        figsize=(2.0 * n_col, 2.3 * n_row),
        constrained_layout=True,
    )
    fig.set_constrained_layout_pads(w_pad=0.01, h_pad=0.02, hspace=0, wspace=0)
    fig.set_edgecolor("black")
    fig.suptitle(title, size=16)

    for ax, vec in zip(axs.flat, images):
        im = ax.imshow(
            vec.reshape((28, 28)),
            cmap='Blues',
            interpolation="nearest"
        )
        ax.axis("off")

    fig.colorbar(im, ax=axs, orientation="horizontal", shrink=0.99, aspect=40, pad=0.01)
    plt.show()

plot_gallery("Sample Training from Fashion MNIST", x_train)

## kNN + PCA Implementation 

Utilize the PCA algorithm to decompose your highly dimensional dataset into fewer components and then utilize kNN to check if you've created a sufficient model.

In [None]:
from sklearn.decomposition import PCA

import seaborn as sns

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

from sklearn.model_selection import train_test_split, RandomizedSearchCV

In [None]:
# flatten your data files
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

print("new x_train shape", x_train_flat.shape)
print("new x_test shape", x_test_flat.shape)

In [None]:
# visualize your flattened data
plt.figure(figsize=(18, 1))
plt.imshow(x_train_flat[0].reshape(1, -1), cmap='Blues', aspect='auto')
plt.title("Flattened Image (1x784)")
plt.xlabel("Pixel index")
plt.yticks([])
plt.tight_layout()
plt.show()

In [None]:
# TODO: initialize PCA to reduce your flattened data to 8 components for visualization
pca_estimator = ...

pca_estimator.fit(...)
plot_gallery("Eigenclothes on 8 Components", pca_estimator.components_, 4, 2)

In [None]:
# TODO: create 50 components using PCA
pca = ...

# TODO: convert our training & testing predictors variables to 100 basic components
X_train_pca = ...
X_test_pca = ...

X_train_pca.shape

In [None]:
# TODO: implement random-search on the knn model to find best hyperparams
params = {
    "n_neighbors": range(5, 50, 5),
    "weights": ["uniform", "distance"],
    "metric": ["cityblock", "cosine", "euclidean", "minkowski"]
}

knn = ...

# TODO: set up RandomizedSearchCV with 5-fold cross-validation
random_search = ...

# TODO: fit this model on your PCA training data
...

In [None]:
# TODO: extract the best estimator
best_knn = ...

# TODO: predict on testing data
yhat = ...

# TODO: evaluate its accuracy
confusion_mat = confusion_matrix(..., ...)
class_report = classification_report(..., ...)
accuracy = accuracy_score(..., ...)

print("Accuracy Score\n",accuracy)
print("Confusion Matrix\n", confusion_mat)
print("Classification Report\n", class_report)

In [None]:
n_samples_to_plot = 16

# Randomly sample indices from the test set
sample_idxs = np.random.choice(len(y_test), n_samples_to_plot, replace=False)

plt.figure(figsize=(12, 8))
for i, idx in enumerate(sample_idxs):
    img = x_test[idx]                      # original image (28x28)
    true_label = y_test[idx]
    pred_label = yhat[idx]

    # Subplot setup
    plt.subplot(4, 4, i + 1)
    plt.imshow(img, cmap='gray')
    title_color = 'green' if true_label == pred_label else 'red'
    plt.title(f"True: {true_label}\nPred: {pred_label}", 
              color=title_color, fontsize=10)
    plt.axis('off')

plt.suptitle("Fashion MNIST – Random Predictions", fontsize=16)
plt.tight_layout()
plt.show()

## Keras Implementation

In [None]:
# standardize your data before running your model
x_train_flat_norm = x_train_flat / 255.0
x_test_flat_norm = x_test_flat / 255.0

In [None]:
# TODO: Create a sequential model with at least 3 layers.
# Input layer: An input object with as many nodes as predictors
# Dense layer: a hidden layer with `relu` or `tanh` activation function
# Out layer: an output layer with as many nodes as classes and a `softmax` activation function
model_clothes = ...

In [None]:
# TODO: compile your model with the 'adam' optimizer, 'sparse_categorical_crossentropy' loss and 'accuracy' for metrics
model_clothes.compile(...)

# TODO: fit your model for 100 models and 20% validation data
model_clothes.fit(...)

In [None]:
# measure the accuract of your model on your test set
loss, accuracy = model_clothes.evaluate(x_test_flat_norm, y_test, verbose=0)
print(f"Loss: {loss}, Test Accuracy: {accuracy}")

In [None]:
# generate predictions for later evaluation
yhat = model_clothes.predict(x_test_flat_norm)

predicted_classes = np.argmax(yhat, axis=1)
predicted_classes

In [None]:
n_samples_to_plot = 16

# Randomly sample indices from the test set
sample_idxs = np.random.choice(len(y_test), n_samples_to_plot, replace=False)

plt.figure(figsize=(12, 8))
for i, idx in enumerate(sample_idxs):
    img = x_test[idx]                      # original image (28x28)
    true_label = y_test[idx]
    pred_label = predicted_classes[idx]

    # Subplot setup
    plt.subplot(4, 4, i + 1)
    plt.imshow(img, cmap='Blues')
    title_color = 'green' if true_label == pred_label else 'red'
    plt.title(f"True: {true_label}\nPred: {pred_label}", 
              color=title_color, fontsize=10)
    plt.axis('off')

plt.suptitle("Fashion MNIST – Random Predictions", fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# extract weights & balances
weights, biases = model_clothes.layers[0].get_weights()

print("weights shape:", weights.shape)  # (784, 128)
print("biases shape:", biases.shape)    # (128,)


In [None]:
# Visualize the first 20 neurons' weight vectors as 64x64 images
fig, axes = plt.subplots(4, 5, figsize=(15, 6))

for i, ax in enumerate(axes.flat):
    weight_vector = weights[:, i]
    weight_image = weight_vector.reshape((28, 28))  # reshape from 4096 to 64x64

    ax.imshow(weight_image, cmap='Blues')
    ax.set_title(f'Neuron {i}')
    ax.axis('off')

plt.suptitle("Visualizations of First Layer Weights")
plt.tight_layout()
plt.show()

In [None]:
# serialize your model
model_clothes.save("model.keras")

To run your clothing prediction algorithm in streamlit, install the dependencies below in your `ds` environment and then run `streamlit run cloth-predict.py` in your terminal!

In [None]:
!pip install streamlit opencv-python pillow