In [6]:
import os
import csv
import random
from datetime import datetime, timedelta

# 📂 Folder where images are stored (adjust if needed)
image_root = "images"

# ✅ Write the CSV in the current directory (ML_project/)
output_csv = "synthetic_expiry_labels.csv"

# 🏷️ Generate expiry date and class
def generate_expiry_info():
    today = datetime.today()
    days_to_expiry = random.randint(-15, 60)
    expiry_date = today + timedelta(days=days_to_expiry)

    if days_to_expiry < 0:
        label = "expired"
    elif days_to_expiry <= 15:
        label = "expiring_soon"
    else:
        label = "fresh"

    return expiry_date.strftime("%Y-%m-%d"), label

# 🖼️ Loop over images and create labels
rows = []
for category in os.listdir(image_root):
    category_path = os.path.join(image_root, category)
    if os.path.isdir(category_path):
        for filename in os.listdir(category_path):
            if filename.lower().endswith((".jpg", ".png", ".jpeg")):
                rel_path = os.path.join(category, filename)  # Relative path
                expiry_date, label = generate_expiry_info()
                rows.append([rel_path, expiry_date, label])

# 💾 Write to CSV
with open(output_csv, mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["filename", "expiry_date", "freshness_class"])
    writer.writerows(rows)

print(f"[✔] CSV created with {len(rows)} entries at: {output_csv}")


[✔] CSV created with 4657 entries at: synthetic_expiry_labels.csv


In [7]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split


In [8]:
df = pd.read_csv("synthetic_expiry_labels.csv")
label_map = {"expired": 0, "expiring_soon": 1, "fresh": 2}
df["label"] = df["freshness_class"].map(label_map)


In [9]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])


In [10]:
class GroceryImageDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.loc[idx, "filename"])
        image = Image.open(img_path).convert("RGB")
        label = self.df.loc[idx, "label"]

        if self.transform:
            image = self.transform(image)

        return image, label


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load data and encode classes
df = pd.read_csv("synthetic_expiry_labels.csv")
label_map = {"expired": 0, "expiring_soon": 1, "fresh": 2}
df["label"] = df["freshness_class"].map(label_map)

# Train/test split
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)


In [12]:
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

class GroceryDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.loc[idx, "filename"])
        label = self.df.loc[idx, "label"]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])
# Dataloaders
train_data = GroceryDataset(train_df, "images", transform)
test_data = GroceryDataset(test_df, "images", transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)


In [13]:
import torch.nn as nn
import torch.nn.functional as F

class ExpiryCNN(nn.Module):
    def __init__(self):
        super(ExpiryCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, 3)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # [B, 32, 64, 64]
        x = self.pool(F.relu(self.conv2(x)))  # [B, 64, 32, 32]
        x = self.pool(F.relu(self.conv3(x)))  # [B, 128, 16, 16]
        x = x.view(-1, 128 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [14]:
import torch
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ExpiryCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Define model, criterion, optimizer
model = ExpiryCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# To track loss
train_loss_list = []
val_loss_list = []

epochs = 20
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Average train loss
    avg_train_loss = running_loss / len(train_loader)
    train_loss_list.append(avg_train_loss)

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(test_loader)
    val_loss_list.append(avg_val_loss)

    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Accuracy: {accuracy:.2f}%")

    scheduler.step()

# Plot loss
plt.plot(train_loss_list, label="Train Loss")
plt.plot(val_loss_list, label="Val Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss Curve")
plt.show()

# Classification report & confusion matrix
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=["expired", "expiring_soon", "fresh"]))

cm = confusion_matrix(all_labels, all_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["expired", "expiring_soon", "fresh"], yticklabels=["expired", "expiring_soon", "fresh"])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
