In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MLOps Image Classification - Full Pipeline\n",
    "\n",
    "**Objective**: Train, evaluate, deploy, retrain, and scale a CNN on image data.\n",
    "\n",
    "**Data**: 28×28 grayscale images (horizontal vs vertical lines)\n",
    "**Model**: Fine-tuned ResNet-18\n",
    "**Metrics**: Accuracy, Precision, Recall, F1, Loss\n",
    "**Optimization**: Adam + Weight Decay + LR Scheduler + Early Stopping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch, torch.nn as nn, torch.optim as optim\n",
    "from torchvision import models, transforms\n",
    "from torch.utils.data import DataLoader, Dataset\n",
    "import numpy as np, matplotlib.pyplot as plt, os, json, sqlite3, seaborn as sns\n",
    "from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix\n",
    "from pathlib import Path\n",
    "import warnings; warnings.filterwarnings('ignore')\n",
    "torch.manual_seed(42)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": ["## 1. Data Acquisition & Processing"]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_image(label, size=28, noise=0.2):\n",
    "    img = np.random.rand(size, size) * noise\n",
    "    if label == 0:  # horizontal\n",
    "        img[size//2, :] = 1.0\n",
    "    else:           # vertical\n",
    "        img[:, size//2] = 1.0\n",
    "    return img\n",
    "\n",
    "def build_dataset(n_samples=2000):\n",
    "    X, y = [], []\n",
    "    for i in range(n_samples):\n",
    "        label = i % 2\n",
    "        X.append(generate_image(label))\n",
    "        y.append(label)\n",
    "    return np.array(X)[:, np.newaxis, :, :], np.array(y)\n",
    "\n",
    "X_train, y_train = build_dataset(1600)\n",
    "X_test,  y_test  = build_dataset(400)\n",
    "\n",
    "# Save sample images\n",
    "Path('data/train').mkdir(parents=True, exist_ok=True)\n",
    "Path('data/test').mkdir(parents=True, exist_ok=True)\n",
    "for i in range(50):\n",
    "    np.save(f'data/train/img_{i}.npy', X_train[i])\n",
    "    np.save(f'data/test/img_{i}.npy', X_test[i])"
   ]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "class LineDataset(Dataset):\n",
    "    def __init__(self, X, y, transform=None):\n",
    "        self.X = torch.FloatTensor(X)\n",
    "        self.y = torch.LongTensor(y)\n",
    "        self.transform = transform\n",
    "    def __len__(self): return len(self.y)\n",
    "    def __getitem__(self, idx):\n",
    "        img = self.X[idx]\n",
    "        if self.transform: img = self.transform(img)\n",
    "        return img, self.y[idx]\n",
    "\n",
    "transform = transforms.Compose([transforms.Normalize(mean=[0.5], std=[0.5])])\n",
    "train_ds = LineDataset(X_train, y_train, transform)\n",
    "test_ds  = LineDataset(X_test,  y_test,  transform)\n",
    "train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)\n",
    "test_loader  = DataLoader(test_ds,  batch_size=64, shuffle=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": ["## 2. Model: Fine-tune Pre-trained ResNet-18"]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = models.resnet18(pretrained=True)\n",
    "model.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)\n",
    "model.fc = nn.Linear(model.fc.in_features, 2)\n",
    "\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "model = model.to(device)\n",
    "\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)\n",
    "scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": ["## 3. Training with Early Stopping"]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "class EarlyStopping:\n",
    "    def __init__(self, patience=3, delta=0):\n",
    "        self.patience, self.delta, self.best = patience, delta, None\n",
    "        self.counter, self.early_stop = 0, False\n",
    "    def __call__(self, val_loss):\n",
    "        if self.best is None: self.best = val_loss; return\n",
    "        if val_loss < self.best - self.delta: self.best = val_loss; self.counter = 0\n",
    "        else: self.counter += 1\n",
    "        if self.counter >= self.patience: self.early_stop = True\n",
    "\n",
    "es = EarlyStopping(patience=3)\n",
    "\n",
    "def train_epoch():\n",
    "    model.train(); loss_sum = 0\n",
    "    for x, y in train_loader:\n",
    "        x, y = x.to(device), y.to(device)\n",
    "        optimizer.zero_grad()\n",
    "        out = model(x)\n",
    "        loss = criterion(out, y)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        loss_sum += loss.item() * x.size(0)\n",
    "    return loss_sum / len(train_loader.dataset)\n",
    "\n",
    "def eval_epoch():\n",
    "    model.eval(); preds, trues = [], []\n",
    "    with torch.no_grad():\n",
    "        for x, y in test_loader:\n",
    "            x = x.to(device)\n",
    "            out = model(x)\n",
    "            preds.extend(out.argmax(dim=1).cpu().numpy())\n",
    "            trues.extend(y.numpy())\n",
    "    return preds, trues\n",
    "\n",
    "for epoch in range(20):\n",
    "    train_loss = train_epoch()\n",
    "    preds, trues = eval_epoch()\n",
    "    val_acc = accuracy_score(trues, preds)\n",
    "    scheduler.step(train_loss)\n",
    "    print(f'Epoch {epoch+1:02d} – loss {train_loss:.4f} – val_acc {val_acc:.4f}')\n",
    "    es(train_loss)\n",
    "    if es.early_stop: print('Early stopping!'); break\n",
    "\n",
    "Path('models').mkdir(exist_ok=True)\n",
    "torch.save(model.state_dict(), 'models/resnet_finetuned.pth')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": ["## 4. Evaluation (5 Metrics)"]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "preds, trues = eval_epoch()\n",
    "acc = accuracy_score(trues, preds)\n",
    "prec, rec, f1, _ = precision_recall_fscore_support(trues, preds, average='macro')\n",
    "print(f'Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f} | Loss: {train_loss:.4f}')\n",
    "\n",
    "cm = confusion_matrix(trues, preds)\n",
    "plt.figure(figsize=(5,4))\n",
    "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')\n",
    "plt.title('Confusion Matrix'); plt.xlabel('Pred'); plt.ylabel('True')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": ["## 5. Visualizations (3 Features)"]
  },
  {
   "cell_type": "code",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Sample Images\n",
    "fig, axs = plt.subplots(1,3, figsize=(9,3))\n",
    "for i, ax in enumerate(axs):\n",
    "    ax.imshow(X_test[i].squeeze(), cmap='gray')\n",
    "    ax.set_title(['Horizontal','Vertical'][y_test[i]])\n",
    "    ax.axis('off')\n",
    "plt.show()\n",
    "\n",
    "# 2. Average Image per Class\n",
    "avg_h = np.mean(X_train[y_train==0], axis=0).squeeze()\n",
    "avg_v = np.mean(X_train[y_train==1], axis=0).squeeze()\n",
    "fig, ax = plt.subplots(1,2, figsize=(8,4))\n",
    "ax[0].imshow(avg_h, cmap='hot'); ax[0].set_title('Avg Horizontal')\n",
    "ax[1].imshow(avg_v, cmap='hot'); ax[1].set_title('Avg Vertical')\n",
    "plt.show()\n",
    "\n",
    "# 3. Pixel Intensity Distribution\n",
    "plt.hist(X_train[y_train==0].flatten(), alpha=0.7, label='Horizontal', bins=30)\n",
    "plt.hist(X_train[y_train==1].flatten(), alpha=0.7, label='Vertical', bins=30)\n",
    "plt.legend(); plt.title('Pixel Intensity Distribution'); plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {"name": "python3", "display_name": "Python 3"},
  "language_info": {"name": "python"}
 },
 "nbformat": 4,
 "nbformat_minor": 2
}