In [None]:
# ‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
# ‚ïë  Food‚ÄëImage Classifier ‚Äì VGG19‚ÄØ+‚ÄØSimpleCNN (multi‚Äëfolder)  ‚ïë
# ‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù
#  ‚úî Mounts Google¬†Drive
#  ‚úî Loads images from *both* ‚ÄúOur dataset‚Äù & ‚ÄúExisting dataset‚Äù
#  ‚úî One shared class‚Äëto‚Äëindex mapping
#  ‚úî Skips unreadable / missing images
#  ‚úî Trains combo model (VGG19 backbone ‚Üí SimpleCNN‚Äëstyle head)
#  ‚úî Detailed metrics + ROC curves
#  ‚úî Single‚Äëimage prediction helper
# --------------------------------------------------------------

# ‚ñë‚ñë 1.  Mount Drive ‚ñë‚ñë
from google.colab import drive
drive.mount('/content/drive')

# ‚ñë‚ñë 2.  Imports ‚ñë‚ñë
import os, time, numpy as np
from collections import Counter
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image, UnidentifiedImageError

import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision import transforms, models
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             confusion_matrix, roc_auc_score, roc_curve)
from sklearn.preprocessing import label_binarize

# ‚ñë‚ñë 3.  CombinedDataset ‚ñë‚ñë
class CombinedDataset(Dataset):
    """
    Collect images from multiple root folders while keeping a shared label map.
    Any class missing in one root is simply skipped for that folder.
    """
    def __init__(self, roots, transform=None, exts=None):
        self.roots = roots
        self.transform = transform
        self.exts = exts or {".jpg", ".jpeg", ".png", ".bmp", ".gif"}

        # build global class list
        all_classes = set()
        for root in roots:
            all_classes |= {d.name for d in os.scandir(root) if d.is_dir()}
        self.classes = sorted(all_classes)
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}

        # gather samples
        self.samples = []
        for root in roots:
            for cls in self.classes:
                d = os.path.join(root, cls)
                if not os.path.isdir(d):
                    continue
                for f in os.listdir(d):
                    if os.path.splitext(f)[1].lower() in self.exts:
                        self.samples.append((os.path.join(d, f),
                                             self.class_to_idx[cls]))
        print(f"üì¶ Found {len(self.samples)} images across {len(self.classes)} classes.")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        try:
            img = Image.open(path).convert("RGB")
        except (FileNotFoundError, UnidentifiedImageError):
            # fall back to another sample
            return self.__getitem__((idx + 1) % len(self))
        if self.transform:
            img = self.transform(img)
        return img, label

# ‚ñë‚ñë 4.  Helpers ‚ñë‚ñë
def count_labels(dataset, class_names, split_name):
    cnt = Counter(y for _, y in dataset)
    print(f"üìä {split_name} split:")
    for i, cls in enumerate(class_names):
        print(f"   {cls:>15}: {cnt.get(i,0)}")
    print()

def make_loaders(roots, img_size=224, batch=32, split=0.8):
    tfm = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])
    full = CombinedDataset(roots, transform=tfm)
    classes = full.classes

    n_train = int(len(full) * split)
    n_val = len(full) - n_train
    train_set, val_set = random_split(full, [n_train, n_val])

    count_labels(train_set, classes, "Train")
    count_labels(val_set, classes, "Val")

    train_loader = DataLoader(train_set, batch_size=batch, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_set,   batch_size=batch, shuffle=False, num_workers=2)
    return train_loader, val_loader, classes

# ‚ñë‚ñë 5.  Models ‚ñë‚ñë
class SimpleCNNHead(nn.Module):
    """Refinement head that takes VGG19's 512‚Äëchannel feature map."""
    def __init__(self, num_classes):
        super().__init__()
        self.refine = nn.Sequential(
            nn.Conv2d(512, 256, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),                   # 7√ó7 ‚Üí 3√ó3
            nn.Conv2d(256, 128, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(128, 256), nn.ReLU(),
            nn.Linear(256, num_classes)
        )
    def forward(self, x): return self.refine(x)

class VGG19_SimpleCNN(nn.Module):
    """Backbone = VGG19‚ÄëBN conv block¬†; head = custom SimpleCNNHead."""
    def __init__(self, num_classes, pretrained=True, feature_extract=True):
        super().__init__()
        vgg = models.vgg19_bn(weights=models.VGG19_BN_Weights.DEFAULT
                              if pretrained else None)
        self.features = vgg.features              # ‚üπ output 512√ó7√ó7
        if feature_extract:
            for p in self.features.parameters():
                p.requires_grad = False
        self.head = SimpleCNNHead(num_classes)
    def forward(self, x):
        x = self.features(x)
        return self.head(x)

def get_model(num_classes, device, pretrained=True, feature_extract=True):
    model = VGG19_SimpleCNN(num_classes, pretrained, feature_extract)
    return model.to(device)

# ‚ñë‚ñë 6.  Train &¬†Eval ‚ñë‚ñë
@torch.no_grad()
def val_acc(model, loader, device):
    model.eval()
    hit = tot = 0
    for x, y in loader:
        out = model(x.to(device)).argmax(1)
        hit += (out.cpu() == y).sum().item()
        tot += y.size(0)
    return 100 * hit / tot

def train(model, train_loader, val_loader, device,
          epochs=20, lr=1e-5, save_path='model.pth'):
    crit = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=lr)
    tr_hist, vl_hist = [], []
    for ep in range(1, epochs + 1):
        model.train()
        hit = tot = loss_sum = 0
        for x, y in tqdm(train_loader, desc=f'Epoch {ep}/{epochs}'):
            x, y = x.to(device), y.to(device)
            opt.zero_grad()
            out = model(x)
            loss = crit(out, y)
            loss.backward(); opt.step()
            loss_sum += loss.item() * x.size(0)
            hit += (out.argmax(1) == y).sum().item()
            tot += y.size(0)
        tr_acc = 100 * hit / tot
        vl_acc = val_acc(model, val_loader, device)
        tr_hist.append(tr_acc); vl_hist.append(vl_acc)
        print(f"  Loss {loss_sum/tot:.4f} | Train {tr_acc:.2f}% | Val {vl_acc:.2f}%")
    torch.save(model.state_dict(), save_path); print("üíæ saved:", save_path)

    plt.figure(figsize=(7,4))
    plt.plot(tr_hist, label='train'); plt.plot(vl_hist, label='val')
    plt.xlabel('epoch'); plt.ylabel('accuracy (%)')
    plt.title('Accuracy'); plt.grid(); plt.legend(); plt.show()

@torch.no_grad()
def detailed_report(model, loader, classes, device):
    model.eval()
    all_p, all_y, all_prob = [], [], []
    for x, y in loader:
        out = model(x.to(device))
        all_p.append(out.argmax(1).cpu().numpy())
        all_prob.append(torch.softmax(out,1).cpu().numpy())
        all_y.append(y.numpy())
    y = np.concatenate(all_y); p = np.concatenate(all_p); prob = np.vstack(all_prob)

    acc  = accuracy_score(y, p)
    prec = precision_score(y, p, average='weighted', zero_division=0)
    rec  = recall_score(y, p, average='weighted', zero_division=0)
    print(f"\nüìä Metrics\n Accuracy : {acc:.4f}\n Precision: {prec:.4f}\n Recall   : {rec:.4f}\n")
    print("Confusion matrix:\n", confusion_matrix(y, p))

    try:
        y_bin = label_binarize(y, classes=range(len(classes)))
        auc = roc_auc_score(y_bin, prob, average='weighted', multi_class='ovr')
        print(f" ROC‚ÄëAUC  : {auc:.4f}")

        plt.figure(figsize=(7,6))
        for i, cls in enumerate(classes):
            fpr, tpr, _ = roc_curve((y == i).astype(int), prob[:, i])
            plt.plot(fpr, tpr,
                     label=f'{cls} (AUC {roc_auc_score((y==i), prob[:, i]):.2f})')
        plt.plot([0,1],[0,1],'k--'); plt.legend()
        plt.title('ROC curves'); plt.xlabel('FPR'); plt.ylabel('TPR'); plt.grid(); plt.show()
    except Exception as e:
        print("AUC not available ‚ûú", e)

# ‚ñë‚ñë 7.  Prediction helper ‚ñë‚ñë
def predict_one(model, img_path, classes, device):
    tfm = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ])
    img = Image.open(img_path).convert('RGB')
    pred = model(tfm(img).unsqueeze(0).to(device)).argmax(1).item()
    print(f"üîÆ {os.path.basename(img_path)} ‚ûú {classes[pred]}")

# ‚ñë‚ñë 8.  Main ‚ñë‚ñë
def main():
    # ‚ö†Ô∏è  Point to your two dataset roots
    roots = [
        '/content/drive/MyDrive/Dataset/Dataset/Our dataset',
        '/content/drive/MyDrive/Dataset/Dataset/Existing dataset'
    ]
    save_path = '/content/drive/MyDrive/Dataset/Dataset/Samifood_model_vgg_combo.pth'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("‚öôÔ∏è device:", device)

    # data
    t0 = time.time()
    train_loader, val_loader, classes = make_loaders(roots)
    print(f"‚è± Data ready in {time.time()-t0:.1f}s")

    # model
    model = get_model(len(classes), device,
                      pretrained=True, feature_extract=True)
    train(model, train_loader, val_loader, device,
          epochs=30, lr=1e-5, save_path=save_path)

    # evaluation
    model.load_state_dict(torch.load(save_path, map_location=device))
    detailed_report(model, val_loader, classes, device)

    # quick test ‚Äì change path if you like
    test_img = f"/content/drive/MyDrive/Dataset/Dataset/Our dataset/Mango pudding/IMG20250608133835.jpg"
    predict_one(model, test_img, classes, device)

if __name__ == "__main__":
    main()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚öôÔ∏è device: cpu
üì¶ Found 1695 images across 13 classes.
üìä Train split:
         Chanachur: 36
            Fuchka: 160
          Jhalmuri: 181
          Kalavuna: 29
          Khichuri: 169
     Mango pudding: 16
            Mishti: 350
             Pitha: 82
             Pizza: 48
              Puri: 152
         Roshmalai: 44
          Shingara: 50
   Sugarcane Juice: 39

üìä Val split:
         Chanachur: 9
            Fuchka: 47
          Jhalmuri: 40
          Kalavuna: 6
          Khichuri: 34
     Mango pudding: 2
            Mishti: 101
             Pitha: 10
             Pizza: 11
              Puri: 35
         Roshmalai: 15
          Shingara: 20
   Sugarcane Juice: 9

‚è± Data ready in 434.8s


Downloading: "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth" to /root/.cache/torch/hub/checkpoints/vgg19_bn-c79401a0.pth
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 548M/548M [00:06<00:00, 95.6MB/s]
Epoch 1/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:15<00:00, 28.26s/it]


  Loss 2.5669 | Train 4.42% | Val 3.54%


Epoch 2/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:08<00:00, 28.10s/it]


  Loss 2.5207 | Train 28.54% | Val 40.41%


Epoch 3/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:11<00:00, 28.18s/it]


  Loss 2.4251 | Train 31.56% | Val 29.79%


Epoch 4/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [19:59<00:00, 27.89s/it]


  Loss 2.2723 | Train 26.18% | Val 29.79%


Epoch 5/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:19<00:00, 28.35s/it]


  Loss 2.1172 | Train 32.89% | Val 40.41%


Epoch 6/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:14<00:00, 28.23s/it]


  Loss 1.9703 | Train 37.68% | Val 41.30%


Epoch 7/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [19:51<00:00, 27.71s/it]


  Loss 1.8326 | Train 41.00% | Val 50.74%


Epoch 8/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:21<00:00, 28.40s/it]


  Loss 1.7098 | Train 52.80% | Val 62.83%


Epoch 9/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:18<00:00, 28.34s/it]


  Loss 1.6011 | Train 60.25% | Val 64.90%


Epoch 10/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:00<00:00, 27.91s/it]


  Loss 1.5122 | Train 62.54% | Val 69.62%


Epoch 11/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:01<00:00, 27.93s/it]


  Loss 1.4290 | Train 66.00% | Val 70.50%


Epoch 12/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [19:54<00:00, 27.78s/it]


  Loss 1.3453 | Train 66.30% | Val 70.80%


Epoch 13/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [19:53<00:00, 27.77s/it]


  Loss 1.2695 | Train 67.99% | Val 71.09%


Epoch 14/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [19:52<00:00, 27.74s/it]


  Loss 1.2226 | Train 68.14% | Val 71.68%


Epoch 15/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:04<00:00, 28.01s/it]


  Loss 1.1574 | Train 69.47% | Val 71.68%


Epoch 16/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:15<00:00, 28.27s/it]


  Loss 1.1083 | Train 69.84% | Val 71.98%


Epoch 17/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:21<00:00, 28.41s/it]


  Loss 1.0640 | Train 69.99% | Val 72.27%


Epoch 18/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:20<00:00, 28.39s/it]


  Loss 1.0224 | Train 71.17% | Val 71.98%


Epoch 19/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:12<00:00, 28.20s/it]


  Loss 0.9939 | Train 70.21% | Val 72.57%


Epoch 20/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:00<00:00, 27.92s/it]


  Loss 0.9754 | Train 69.99% | Val 72.86%


Epoch 21/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:07<00:00, 28.08s/it]


  Loss 0.9286 | Train 71.68% | Val 73.75%


Epoch 22/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:05<00:00, 28.03s/it]


  Loss 0.9065 | Train 71.98% | Val 73.45%


Epoch 23/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:05<00:00, 28.04s/it]


  Loss 0.8798 | Train 73.45% | Val 73.75%


Epoch 24/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:08<00:00, 28.11s/it]


  Loss 0.8623 | Train 74.19% | Val 74.04%


Epoch 25/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:02<00:00, 27.97s/it]


  Loss 0.8221 | Train 74.85% | Val 75.22%


Epoch 26/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:05<00:00, 28.04s/it]


  Loss 0.8077 | Train 74.71% | Val 75.52%


Epoch 27/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:14<00:00, 28.25s/it]


  Loss 0.7893 | Train 76.03% | Val 76.11%


Epoch 28/30: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 43/43 [20:03<00:00, 28.00s/it]


  Loss 0.7677 | Train 76.33% | Val 75.81%


Epoch 29/30:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 20/43 [09:35<10:57, 28.59s/it]