In [None]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
import time
import warnings
warnings.filterwarnings('ignore')

# Traditional ML algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

# Gradient Boosting
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Dataset
from tensorflow.keras.datasets import fashion_mnist

# Utilities
import json
import os

print("✅ All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")




[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


# 🖼️ Image Data Algorithms - Fashion-MNIST Classification

Welcome to the Image Data arena! In this notebook, we'll apply various machine learning algorithms to the **Fashion-MNIST dataset** - a more challenging alternative to the classic MNIST digits dataset.

## 📊 Dataset: Fashion-MNIST

- **Source**: Zalando's article images
- **Classes**: 10 fashion categories (T-shirt, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot)
- **Size**: 60,000 training + 10,000 test images
- **Image Dimensions**: 28x28 grayscale pixels
- **Goal**: Multi-class classification of fashion items

---

## 🛠️ Step 1: Load and Explore the Dataset


In [None]:
# Load Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Fashion-MNIST class names
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

print(f"Training data shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Test labels shape: {y_test.shape}")
print(f"Number of classes: {len(class_names)}")
print(f"Pixel value range: {X_train.min()} - {X_train.max()}")

# Visualize sample images
plt.figure(figsize=(15, 8))
for i in range(20):
    plt.subplot(4, 5, i + 1)
    plt.imshow(X_train[i], cmap='gray')
    plt.title(f'{class_names[y_train[i]]}')
    plt.axis('off')
plt.suptitle('Fashion-MNIST Sample Images', fontsize=16)
plt.tight_layout()
plt.show()

# Class distribution
plt.figure(figsize=(12, 6))
unique, counts = np.unique(y_train, return_counts=True)
plt.bar([class_names[i] for i in unique], counts)
plt.title('Class Distribution in Training Set')
plt.xlabel('Fashion Categories')
plt.ylabel('Number of Samples')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("Class distribution:")
for i, count in enumerate(counts):
    print(f"{class_names[i]}: {count} samples ({count/len(y_train)*100:.1f}%)")

## 🔧 Step 2: Data Preprocessing


In [None]:
# Normalize pixel values to [0, 1]
X_train_normalized = X_train.astype('float32') / 255.0
X_test_normalized = X_test.astype('float32') / 255.0

# For traditional ML algorithms, flatten the images
X_train_flat = X_train_normalized.reshape(X_train_normalized.shape[0], -1)
X_test_flat = X_test_normalized.reshape(X_test_normalized.shape[0], -1)

print(f"Flattened training data shape: {X_train_flat.shape}")
print(f"Flattened test data shape: {X_test_flat.shape}")

# For memory efficiency, let's use a subset for traditional ML algorithms
# (Fashion-MNIST is larger than traditional tabular datasets)
subset_size = 10000
X_train_subset = X_train_flat[:subset_size]
y_train_subset = y_train[:subset_size]

print(f"Using subset of {subset_size} samples for traditional ML algorithms")
print(f"Subset shape: {X_train_subset.shape}")

# For CNNs, prepare data with channel dimension
X_train_cnn = X_train_normalized.reshape(-1, 28, 28, 1)
X_test_cnn = X_test_normalized.reshape(-1, 28, 28, 1)

print(f"CNN input shape: {X_train_cnn.shape}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.9018 - loss: 0.2787


## 🤖 Step 3: Traditional Machine Learning Algorithms


In [None]:
# Initialize algorithms
algorithms = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1),
    'SVM (RBF)': SVC(kernel='rbf', random_state=42, probability=True),
    'K-Nearest Neighbors': KNeighborsClassifier(n_neighbors=5, n_jobs=-1),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'XGBoost': xgb.XGBClassifier(random_state=42, eval_metric='mlogloss'),
    'LightGBM': lgb.LGBMClassifier(random_state=42, verbose=-1),
    'CatBoost': CatBoostClassifier(random_state=42, verbose=False)
}

# Store results
results = {}

print("🚀 Starting algorithm comparison...")

# Train and evaluate each algorithm
for name, algorithm in algorithms.items():
    print(f"Training {name}...")
    
    # Measure training time
    start_time = time.time()
    
    # Train the model
    algorithm.fit(X_train_subset, y_train_subset)
    
    # Make predictions
    y_pred = algorithm.predict(X_test_flat)
    
    # Calculate training time
    training_time = time.time() - start_time
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    
    # Cross-validation score (on subset for speed)
    cv_scores = cross_val_score(algorithm, X_train_subset, y_train_subset, cv=3, scoring='accuracy')
    cv_mean = cv_scores.mean()
    cv_std = cv_scores.std()
    
    # Store results
    results[name] = {
        'accuracy': accuracy,
        'cv_mean': cv_mean,
        'cv_std': cv_std,
        'training_time': training_time,
        'predictions': y_pred.tolist(),
        'confusion_matrix': confusion_matrix(y_test, y_pred).tolist()
    }
    
    print(f"✅ {name}: Accuracy = {accuracy:.4f}, CV Score = {cv_mean:.4f} (±{cv_std:.4f}), Time = {training_time:.2f}s")

## 🧠 Step 4: Deep Learning Models


In [None]:
# Simple Neural Network
def create_simple_nn():
    model = keras.Sequential([
        layers.Flatten(input_shape=(28, 28)),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(10, activation='softmax')
    ])
    return model

# Convolutional Neural Network
def create_cnn():
    model = keras.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(10, activation='softmax')
    ])
    return model

print("🧠 Deep Learning models defined")

In [None]:
# Train Simple Neural Network
print("Training Simple Neural Network...")
start_time = time.time()

nn_model = create_simple_nn()
nn_model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

# Train with validation split
history_nn = nn_model.fit(X_train_normalized, y_train,
                          epochs=10,
                          batch_size=128,
                          validation_split=0.1,
                          verbose=1)

# Evaluate
nn_loss, nn_accuracy = nn_model.evaluate(X_test_normalized, y_test, verbose=0)
nn_training_time = time.time() - start_time
nn_predictions = np.argmax(nn_model.predict(X_test_normalized), axis=1)

# Store results
results['Simple Neural Network'] = {
    'accuracy': nn_accuracy,
    'loss': nn_loss,
    'training_time': nn_training_time,
    'predictions': nn_predictions.tolist(),
    'confusion_matrix': confusion_matrix(y_test, nn_predictions).tolist(),
    'history': history_nn.history
}

print(f"✅ Simple NN: Accuracy = {nn_accuracy:.4f}, Loss = {nn_loss:.4f}, Time = {nn_training_time:.2f}s")