# Fashion MNIST

## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
#...

from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_curve, auc

## Prepare Data

- convert ubyte to a dataframe/csv.


## Load Data

- load dataframe.
- check for missing values.
- check for duplicated rows.

In [None]:
train_data = pd.read_csv('fashion-mnist_train.csv')
test_data = pd.read_csv('fashion-mnist_test.csv')

In [None]:
train_labels = train_data.iloc[:, 0].values
train_images = train_data.iloc[:, 1:].values / 255.0

test_labels = test_data.iloc[:, 0].values
test_images = test_data.iloc[:, 1:].values / 255.0

## Data Preprocessing

- feature extraction
- scaling

In [None]:
X_train, X_val, y_train, y_val= train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

## Model Training


### LogisticRegression

In [None]:
logistic_model = LogisticRegression(max_iter=1000, solver='lbfgs', multi_class='multinomial')
logistic_model.fit(X_train, y_train)

logistic_val_predictions = logistic_model.predict(X_val)
logistic_test_predictions = logistic_model.predict(test_images)
logistic_test_probabilities = logistic_model.predict_proba(test_images)

logistic_val_accuracy = accuracy_score(y_val, logistic_val_predictions)
logistic_test_accuracy = accuracy_score(test_labels, logistic_test_predictions)

print(f'Logistic Regression Validation Accuracy: {logistic_val_accuracy:.4f}')
print(f'Logistic Regression Test Accuracy: {logistic_test_accuracy:.4f}')

logistic_conf_matrix = confusion_matrix(test_labels, logistic_test_predictions)
print('Logistic Regression Confusion Matrix:')
print(logistic_conf_matrix)

logistic_precision = precision_score(test_labels, logistic_test_predictions, average='macro')
logistic_recall = recall_score(test_labels, logistic_test_predictions, average='macro')

print(f'Logistic Regression Precision: {logistic_precision:.4f}')
print(f'Logistic Regression Recall: {logistic_recall:.4f}')


### KNNClassifier

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

knn_val_predictions = knn_model.predict(X_val)
knn_test_predictions = knn_model.predict(test_images)
knn_test_probabilities = knn_model.predict_proba(test_images)

knn_val_accuracy = accuracy_score(y_val, knn_val_predictions)
knn_test_accuracy = accuracy_score(test_labels, knn_test_predictions)

print(f'KNN Validation Accuracy: {knn_val_accuracy:.4f}')
print(f'KNN Test Accuracy: {knn_test_accuracy:.4f}')

knn_conf_matrix = confusion_matrix(test_labels, knn_test_predictions)
print('KNN Confusion Matrix:')
print(knn_conf_matrix)

knn_precision = precision_score(test_labels, knn_test_predictions, average='macro')
knn_recall = recall_score(test_labels, knn_test_predictions, average='macro')

print(f'KNN Precision: {knn_precision:.4f}')
print(f'KNN Recall: {knn_recall:.4f}')

## Model Evaluation

- loss curve.
- accuracy curve.
- confusion_matrix (precision, recall).
- ROC curve (AUC).

In [None]:
fpr_logistic = {}
tpr_logistic = {}
roc_auc_logistic = {}
fpr_knn = {}
tpr_knn = {}
roc_auc_knn = {}

for i in range(10):
    fpr_logistic[i], tpr_logistic[i], _ = roc_curve(test_labels == i, logistic_test_probabilities[:, i])
    roc_auc_logistic[i] = auc(fpr_logistic[i], tpr_logistic[i])
    fpr_knn[i], tpr_knn[i], _ = roc_curve(test_labels == i, knn_test_probabilities[:, i])
    roc_auc_knn[i] = auc(fpr_knn[i], tpr_knn[i])

plt.figure(figsize=(12, 8))
for i in range(10):
    plt.plot(fpr_logistic[i], tpr_logistic[i], label=f'Logistic Class {i} (AUC = {roc_auc_logistic[i]:.2f})')
    plt.plot(fpr_knn[i], tpr_knn[i], label=f'KNN Class {i} (AUC = {roc_auc_knn[i]:.2f})', linestyle='--')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()


## Conclusion