In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import psutil

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# File paths
image_dir = r"C:\Users\mohan\OneDrive\Desktop\First_year_assignments\electrical\Images"
labels_path = r"C:\Users\mohan\OneDrive\Desktop\First_year_assignments\electrical\captions.txt"

# Create a directory to save models and weights
model_dir = "models"
os.makedirs(model_dir, exist_ok=True)

# Load captions from TXT file
def load_captions(file_path):
    captions = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            parts = line.strip().split(",", 1)
            if len(parts) == 2:
                image_name, caption = parts
                captions.append((image_name.strip(), caption.strip()))
    return pd.DataFrame(captions, columns=["image", "caption"])

labels_df = load_captions(labels_path)

# Encode captions as numerical labels
label_encoder = LabelEncoder()
labels_df["label"] = label_encoder.fit_transform(labels_df["caption"])

# Image preprocessing
img_size = (128, 128)
transform = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom Dataset class
class ImageCaptionDataset(Dataset):
    def _init_(self, dataframe, img_dir, transform=None):
        super()._init_()
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def _len_(self):
        return len(self.dataframe)

    def _getitem_(self, idx):
        img_name = os.path.join(self.img_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        label = self.dataframe.iloc[idx, 2]
        if self.transform:
            image = self.transform(image)
        return image, label

# CNN Model
class SimpleCNN(nn.Module):
    def _init_(self, num_classes):
        super(SimpleCNN, self)._init_()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(128 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = self.pool(nn.functional.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Corrected this line
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

if _name_ == '_main_':
    # Utilize maximum RAM and CPU
    num_workers = psutil.cpu_count(logical=True)
    batch_size = 32

    dataset = ImageCaptionDataset(labels_df, image_dir, transform=transform)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, persistent_workers=True)

    num_classes = len(label_encoder.classes_)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SimpleCNN(num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    # Enable optimized GPU computations
    torch.backends.cudnn.benchmark = True

    # Training
    num_epochs = 100
    train_losses = []
    train_accuracies = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for i, (images, labels) in enumerate(data_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            print(f"Batch [{i+1}/{len(data_loader)}], Loss: {loss.item():.4f}, RAM Usage: {psutil.virtual_memory().percent}%")

        epoch_loss = running_loss / len(data_loader)
        epoch_accuracy = 100 * (correct / total)
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_accuracy)

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%, RAM Usage: {psutil.virtual_memory().percent}%")

        # Save model checkpoint after every epoch
        torch.save(model.state_dict(), os.path.join(model_dir, f"model_epoch_{epoch+1}.pth"))

    # Save final model
    final_model_path = os.path.join(model_dir, "final_model.pth")
    torch.save(model.state_dict(), final_model_path)
    print(f"Final model saved at: {final_model_path}")

    # Save only the weights separately
    weights_path = os.path.join(model_dir, "model_weights.pth")
    torch.save(model.state_dict(), weights_path)
    print(f"Weights saved at: {weights_path}")

    # Plot learning curve
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, num_epochs+1), train_losses, label="Loss", marker="o", linestyle="-")
    plt.plot(range(1, num_epochs+1), train_accuracies, label="Accuracy", marker="s", linestyle="--")
    plt.xlabel("Epoch")
    plt.ylabel("Value")
    plt.title("Training Loss & Accuracy Curve")
    plt.legend()
    plt.grid()
    plt.savefig(os.path.join(model_dir, "learning_curve.png"))
    plt.show()