# Import Required Libraries
Import the necessary libraries, including NumPy, pandas, scikit-learn, and matplotlib.

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import matplotlib.pyplot as plt

# Load and Transform Data
Load the Fashion-MNIST dataset from the ubyte files and transform it into a format suitable for saving as CSV files.

In [None]:
# Load and Transform Data

# Function to load ubyte files
def load_mnist(path, kind='train'):
    import os

    labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte')
    images_path = os.path.join(path, f'{kind}-images-idx3-ubyte')

    with open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)

    return images, labels

# Load training and test data
train_images, train_labels = load_mnist('', kind='train')
test_images, test_labels = load_mnist('', kind='t10k')

# Convert to DataFrame
train_df = pd.DataFrame(train_images)
train_df.insert(0, 'label', train_labels)

test_df = pd.DataFrame(test_images)
test_df.insert(0, 'label', test_labels)

# Save Data to CSV Files
Save the transformed training and test datasets to CSV files.

In [None]:
# Save Data to CSV Files

# Save the transformed training dataset to a CSV file
train_df.to_csv('fashion_mnist_train.csv', index=False)

# Save the transformed test dataset to a CSV file
test_df.to_csv('fashion_mnist_test.csv', index=False)

# Load Data from CSV Files
Load the training and test datasets from the CSV files.

In [None]:
# Load Data from CSV Files

# Load the training dataset from the CSV file
train_df = pd.read_csv('fashion_mnist_train.csv')

# Load the test dataset from the CSV file
test_df = pd.read_csv('fashion_mnist_test.csv')

# Separate features and labels for training data
X_train = train_df.drop(columns=['label'])
y_train = train_df['label']

# Separate features and labels for test data
X_test = test_df.drop(columns=['label'])
y_test = test_df['label']

# Train Logistic Regression Model
Train a logistic regression model on the training dataset.

In [None]:
# Train Logistic Regression Model


# Initialize the StandardScaler
scaler = StandardScaler()

# Scale the training and test data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Logistic Regression model
logistic_model = LogisticRegression(max_iter=1000)

# Train the model on the scaled training data
logistic_model.fit(X_train_scaled, y_train)

# Predict on the scaled test data
y_pred_logistic = logistic_model.predict(X_test_scaled)

# Calculate accuracy
accuracy_logistic = logistic_model.score(X_test_scaled, y_test)

# Generate confusion matrix
conf_matrix_logistic = confusion_matrix(y_test, y_pred_logistic)

# Generate classification report
class_report_logistic = classification_report(y_test, y_pred_logistic)

# Generate ROC curve and AUC
y_prob_logistic = logistic_model.predict_proba(X_test_scaled)
fpr_logistic, tpr_logistic, _ = roc_curve(y_test, y_prob_logistic[:, 1], pos_label=1)
roc_auc_logistic = auc(fpr_logistic, tpr_logistic)

# Print accuracy and classification report
print(f'Logistic Regression Accuracy: {accuracy_logistic}')
print('Classification Report:')
print(class_report_logistic)

# Plot confusion matrix
plt.figure(figsize=(10, 7))
plt.matshow(conf_matrix_logistic, cmap='coolwarm', fignum=1)
plt.title('Confusion Matrix - Logistic Regression')
plt.colorbar()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Plot ROC curve
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc_logistic:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Logistic Regression')
plt.legend(loc='lower right')
plt.show()

# Train K-Nearest Neighbors Model
Train a K-Nearest Neighbors (KNN) model on the training dataset.

In [None]:
# Train K-Nearest Neighbors Model

# Initialize the K-Nearest Neighbors model
knn_model = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn_model.fit(X_train, y_train)

# Predict on the test data
y_pred_knn = knn_model.predict(X_test)

# Calculate accuracy
accuracy_knn = knn_model.score(X_test, y_test)

# Generate confusion matrix
conf_matrix_knn = confusion_matrix(y_test, y_pred_knn)

# Generate classification report
class_report_knn = classification_report(y_test, y_pred_knn)

# Generate ROC curve and AUC
y_prob_knn = knn_model.predict_proba(X_test)
fpr_knn, tpr_knn, _ = roc_curve(y_test, y_prob_knn[:, 1], pos_label=1)
roc_auc_knn = auc(fpr_knn, tpr_knn)

# Print accuracy and classification report
print(f'K-Nearest Neighbors Accuracy: {accuracy_knn}')
print('Classification Report:')
print(class_report_knn)

# Plot confusion matrix
plt.figure(figsize=(10, 7))
plt.matshow(conf_matrix_knn, cmap='coolwarm', fignum=1)
plt.title('Confusion Matrix - K-Nearest Neighbors')
plt.colorbar()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Plot ROC curve
plt.figure()
plt.plot(fpr_knn, tpr_knn, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - K-Nearest Neighbors')
plt.legend(loc='lower right')
plt.show()

# Evaluate Models
Evaluate the performance of the trained models using various metrics.

In [None]:
# Evaluate Models

# Compare Logistic Regression and K-Nearest Neighbors

# Print comparison of accuracies
print(f'Logistic Regression Accuracy: {accuracy_logistic}')
print(f'K-Nearest Neighbors Accuracy: {accuracy_knn}')

# Plot comparison of ROC curves
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'Logistic Regression (area = {roc_auc_logistic:.2f})')
plt.plot(fpr_knn, tpr_knn, color='blue', lw=2, label=f'K-Nearest Neighbors (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Comparison')
plt.legend(loc='lower right')
plt.show()

# Print classification reports for both models
print('Classification Report - Logistic Regression:')
print(class_report_logistic)
print('Classification Report - K-Nearest Neighbors:')
print(class_report_knn)

# Plot confusion matrices side by side for comparison
fig, axes = plt.subplots(1, 2, figsize=(20, 7))
axes[0].matshow(conf_matrix_logistic, cmap='coolwarm')
axes[0].set_title('Confusion Matrix - Logistic Regression')
axes[0].set_ylabel('True Label')
axes[0].set_xlabel('Predicted Label')

axes[1].matshow(conf_matrix_knn, cmap='coolwarm')
axes[1].set_title('Confusion Matrix - K-Nearest Neighbors')
axes[1].set_ylabel('True Label')
axes[1].set_xlabel('Predicted Label')

plt.show()

# Loss Curve
Plot the loss curve for the logistic regression model.

In [None]:
# Loss Curve

# Plot the loss curve for the MLPClassifier model
plt.figure()
plt.plot(logistic_model.loss_curve_, label='Loss Curve')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.title('Loss Curve - MLPClassifier')
plt.legend()
plt.show()

# Accuracy
Calculate and display the accuracy of both models on the test dataset.

In [None]:
# Accuracy

# Calculate and display the accuracy of both models on the test dataset

# Print accuracy for Logistic Regression
print(f'Logistic Regression Accuracy: {accuracy_logistic}')

# Print accuracy for K-Nearest Neighbors
print(f'K-Nearest Neighbors Accuracy: {accuracy_knn}')

# Plot comparison of accuracies
models = ['Logistic Regression', 'K-Nearest Neighbors']
accuracies = [accuracy_logistic, accuracy_knn]

plt.figure(figsize=(10, 5))
plt.bar(models, accuracies, color=['blue', 'orange'])
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Comparison of Model Accuracies')
plt.ylim(0, 1)
plt.show()

# Confusion Matrix
Generate and visualize the confusion matrix for both models.

In [None]:
# Confusion Matrix

# Generate confusion matrix for Logistic Regression
conf_matrix_logistic = confusion_matrix(y_test, y_pred_logistic)

# Plot confusion matrix for Logistic Regression
plt.figure(figsize=(10, 7))
plt.matshow(conf_matrix_logistic, cmap='coolwarm', fignum=1)
plt.title('Confusion Matrix - Logistic Regression')
plt.colorbar()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Generate confusion matrix for K-Nearest Neighbors
conf_matrix_knn = confusion_matrix(y_test, y_pred_knn)

# Plot confusion matrix for K-Nearest Neighbors
plt.figure(figsize=(10, 7))
plt.matshow(conf_matrix_knn, cmap='coolwarm', fignum=1)
plt.title('Confusion Matrix - K-Nearest Neighbors')
plt.colorbar()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Precision and Recall
Calculate and display the precision and recall for both models.

In [None]:
# Precision and Recall

# Calculate precision and recall for Logistic Regression
precision_logistic = classification_report(y_test, y_pred_logistic, output_dict=True)['weighted avg']['precision']
recall_logistic = classification_report(y_test, y_pred_logistic, output_dict=True)['weighted avg']['recall']

# Calculate precision and recall for K-Nearest Neighbors
precision_knn = classification_report(y_test, y_pred_knn, output_dict=True)['weighted avg']['precision']
recall_knn = classification_report(y_test, y_pred_knn, output_dict=True)['weighted avg']['recall']

# Print precision and recall for both models
print(f'Logistic Regression Precision: {precision_logistic}')
print(f'Logistic Regression Recall: {recall_logistic}')
print(f'K-Nearest Neighbors Precision: {precision_knn}')
print(f'K-Nearest Neighbors Recall: {recall_knn}')

# Plot comparison of precision and recall
models = ['Logistic Regression', 'K-Nearest Neighbors']
precisions = [precision_logistic, precision_knn]
recalls = [recall_logistic, recall_knn]

plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.bar(models, precisions, color=['blue', 'orange'])
plt.xlabel('Model')
plt.ylabel('Precision')
plt.title('Comparison of Model Precisions')
plt.ylim(0, 1)

plt.subplot(1, 2, 2)
plt.bar(models, recalls, color=['blue', 'orange'])
plt.xlabel('Model')
plt.ylabel('Recall')
plt.title('Comparison of Model Recalls')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()

# ROC and AUC Graph
Plot the ROC curve and calculate the AUC for both models.

In [None]:
# ROC and AUC Graph

# Plot ROC curve for Logistic Regression
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'Logistic Regression (area = {roc_auc_logistic:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Logistic Regression')
plt.legend(loc='lower right')
plt.show()

# Plot ROC curve for K-Nearest Neighbors
plt.figure()
plt.plot(fpr_knn, tpr_knn, color='blue', lw=2, label=f'K-Nearest Neighbors (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - K-Nearest Neighbors')
plt.legend(loc='lower right')
plt.show()

# Plot comparison of ROC curves
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'Logistic Regression (area = {roc_auc_logistic:.2f})')
plt.plot(fpr_knn, tpr_knn, color='blue', lw=2, label=f'K-Nearest Neighbors (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Comparison')
plt.legend(loc='lower right')
plt.show()

# Compare and Visualize Results
Compare the performance of the logistic regression and KNN models and visualize which algorithm performs better.

In [None]:
# Compare and Visualize Results

# Print comparison of accuracies
print(f'Logistic Regression Accuracy: {accuracy_logistic}')
print(f'K-Nearest Neighbors Accuracy: {accuracy_knn}')

# Plot comparison of ROC curves
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'Logistic Regression (area = {roc_auc_logistic:.2f})')
plt.plot(fpr_knn, tpr_knn, color='blue', lw=2, label=f'K-Nearest Neighbors (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Comparison')
plt.legend(loc='lower right')
plt.show()

# Print classification reports for both models
print('Classification Report - Logistic Regression:')
print(class_report_logistic)
print('Classification Report - K-Nearest Neighbors:')
print(class_report_knn)

# Plot confusion matrices side by side for comparison
fig, axes = plt.subplots(1, 2, figsize=(20, 7))
axes[0].matshow(conf_matrix_logistic, cmap='coolwarm')
axes[0].set_title('Confusion Matrix - Logistic Regression')
axes[0].set_ylabel('True Label')
axes[0].set_xlabel('Predicted Label')

axes[1].matshow(conf_matrix_knn, cmap='coolwarm')
axes[1].set_title('Confusion Matrix - K-Nearest Neighbors')
axes[1].set_ylabel('True Label')
axes[1].set_xlabel('Predicted Label')

plt.show()

# Plot the loss curve for the logistic regression model
# plt.figure()
# plt.plot(logistic_model.loss_curve_, label='Loss Curve')
# plt.xlabel('Iterations')
# plt.ylabel('Loss')
# plt.title('Loss Curve - Logistic Regression')
# plt.legend()
# plt.show()

# Calculate and display the accuracy of both models on the test dataset

# Print accuracy for Logistic Regression
print(f'Logistic Regression Accuracy: {accuracy_logistic}')

# Print accuracy for K-Nearest Neighbors
print(f'K-Nearest Neighbors Accuracy: {accuracy_knn}')

# Plot comparison of accuracies
models = ['Logistic Regression', 'K-Nearest Neighbors']
accuracies = [accuracy_logistic, accuracy_knn]

plt.figure(figsize=(10, 5))
plt.bar(models, accuracies, color=['blue', 'orange'])
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Comparison of Model Accuracies')
plt.ylim(0, 1)
plt.show()

# Calculate precision and recall for Logistic Regression
precision_logistic = classification_report(y_test, y_pred_logistic, output_dict=True)['weighted avg']['precision']
recall_logistic = classification_report(y_test, y_pred_logistic, output_dict=True)['weighted avg']['recall']

# Calculate precision and recall for K-Nearest Neighbors
precision_knn = classification_report(y_test, y_pred_knn, output_dict=True)['weighted avg']['precision']
recall_knn = classification_report(y_test, y_pred_knn, output_dict=True)['weighted avg']['recall']

# Print precision and recall for both models
print(f'Logistic Regression Precision: {precision_logistic}')
print(f'Logistic Regression Recall: {recall_logistic}')
print(f'K-Nearest Neighbors Precision: {precision_knn}')
print(f'K-Nearest Neighbors Recall: {recall_knn}')

# Plot comparison of precision and recall
models = ['Logistic Regression', 'K-Nearest Neighbors']
precisions = [precision_logistic, precision_knn]
recalls = [recall_logistic, recall_knn]

plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.bar(models, precisions, color=['blue', 'orange'])
plt.xlabel('Model')
plt.ylabel('Precision')
plt.title('Comparison of Model Precisions')
plt.ylim(0, 1)

plt.subplot(1, 2, 2)
plt.bar(models, recalls, color=['blue', 'orange'])
plt.xlabel('Model')
plt.ylabel('Recall')
plt.title('Comparison of Model Recalls')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()

# Plot ROC curve for Logistic Regression
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'Logistic Regression (area = {roc_auc_logistic:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Logistic Regression')
plt.legend(loc='lower right')
plt.show()

# Plot ROC curve for K-Nearest Neighbors
plt.figure()
plt.plot(fpr_knn, tpr_knn, color='blue', lw=2, label=f'K-Nearest Neighbors (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - K-Nearest Neighbors')
plt.legend(loc='lower right')
plt.show()

# Plot comparison of ROC curves
plt.figure()
plt.plot(fpr_logistic, tpr_logistic, color='darkorange', lw=2, label=f'Logistic Regression (area = {roc_auc_logistic:.2f})')
plt.plot(fpr_knn, tpr_knn, color='blue', lw=2, label=f'K-Nearest Neighbors (area = {roc_auc_knn:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic - Comparison')
plt.legend(loc='lower right')
plt.show()