In [1]:
# Import everything
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from transformers import ViTImageProcessor
from datasets import load_dataset, load_metric
import glob
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as TF
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import torchvision.models as models
import torchvision.transforms as transforms
import copy

In [2]:
# Custom Data loader class
class CustomDataset(Dataset):
    def __init__(self, folder_path, class_map_dict, transform=None):
        # This will detect folder
        self.imgs_path = folder_path
        file_list = glob.glob(self.imgs_path + "/*")
        # print(file_list)
        # This will make the image and label list 
        self.data = []
        for class_path in file_list:
            class_name = class_path.split("\\")[-1]
            for img_path in glob.glob(class_path + "/*.jpg"):
                self.data.append([img_path, class_name])
        # This will convert the label to class id
        self.class_map = class_map_dict #{"dogs": 0, "cats": 1}
        # Set the transform here
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # This function returns the image tensor and class id
        img_path, class_name = self.data[idx]
        img = cv2.imread(img_path)
        # img = Image.open(img_path).convert("RGB")
        class_id = self.class_map[class_name]
        if self.transform:
            img_tensor = self.transform(img)
        # img_tensor = img_tensor.permute(2, 0, 1)
        return img_tensor, class_id

In [3]:
transform = TF.Compose([
    TF.ToPILImage(),
    TF.Resize((224, 224)),
    TF.ToTensor(),
    TF.Normalize(mean=[0.4672, 0.4943, 0.4919],std=[0.1296, 0.1288, 0.1283])
])

# Dataset maker
train_dataset = CustomDataset(folder_path=r"D:\College\Semester_2\CT5135\Model_1\train", class_map_dict={"wet_asphalt_slight": 0, "not_wet_asphalt_slight": 1},transform=transform)
valid_dataset = CustomDataset(folder_path=r"D:\College\Semester_2\CT5135\Model_1\valid", class_map_dict={"wet_asphalt_slight": 0, "not_wet_asphalt_slight": 1},transform=transform)

In [4]:
# Make the data loader
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True,drop_last=True, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=True,drop_last=True, pin_memory=True)

In [21]:
# placeholders
psum = torch.tensor([0.0, 0.0, 0.0])
psum_sq = torch.tensor([0.0, 0.0, 0.0])

count = 0
# loop through images
for inputs, labels in tqdm(train_loader):
    # print(inputs,labels)
    psum += inputs.sum(axis=[0, 2, 3])
    psum_sq += (inputs**2).sum(axis=[0, 2, 3])

#  pixel count
count = len(train_dataset) * 224 * 224

# mean and std
total_mean = psum / count
total_var = (psum_sq / count) - (total_mean**2)
total_std = torch.sqrt(total_var)

# output
print("mean: " + str(total_mean))
print("std:  " + str(total_std))

100%|██████████| 8437/8437 [02:45<00:00, 51.06it/s]

mean: tensor([0.4672, 0.4943, 0.4919])
std:  tensor([0.1296, 0.1288, 0.1283])





In [5]:
# Check for CUDA acceleration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} for training")

Using cuda:0 for training


In [7]:

# Number of epochs
train_loss_list = []
train_accuracy_list = []
test_loss_list = []
test_accuracy_list = []
best_accuracy = 0.0
number_of_epoch = 2
model = models.resnet50(pretrained=True).to(device)

# define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9,weight_decay=5e-4)

# Define learning rate scheduler
schedular = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
criterion = nn.CrossEntropyLoss()


save_interval = 5

for epoch in range(number_of_epoch):
    # Switch to train mode
    model.train()
    # Initialize the progress bar
    train_iterator = tqdm(train_loader, desc=f"Epoch {epoch+1}/{number_of_epoch}", unit="batch")
    # Initialize the training variables
    train_loss = 0.0
    train_total = 0
    train_correct = 0

    for inputs, labels in train_iterator:
        inputs, labels = inputs.to(device), labels.to(device)
        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        # Update the progress bar
        train_iterator.set_postfix_str(f"Loss: {loss.item():.6f}")

        # Update training loss
        train_loss += loss.item() * inputs.size(0)

        # Compute training accuracy
        _, predicted = torch.max(outputs, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    
    # Compute average training loss and accuracy
    train_loss = train_loss / len(train_iterator)
    train_accuracy = 100.0 * train_correct / train_total

    # Initialize the test metric calculator
    test_loss = 0.0
    test_total = 0
    test_correct = 0

    # Switch to evaluation mode
    model.eval()

    # Initialize the progress bar
    valid_iterator = tqdm(valid_loader, desc=f"Epoch {epoch+1}/{number_of_epoch}", unit="batch")

    with torch.no_grad():
        for inputs, labels in valid_iterator:
            inputs, labels = inputs.to(device), labels.to(device)
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # Update the progress bar
            test_loss += loss.item() * inputs.size(0)
            valid_iterator.set_postfix_str(f"Loss: {loss.item():.6f}")

            # Compute validation accuracy
            _, predicted = torch.max(outputs, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()

    # Compute average test loss and accuracy
    test_loss = test_loss / len(valid_iterator)
    test_accuracy = 100.0 * test_correct / test_total

    # Save variables to make graph later
    train_loss_list.append(train_loss)
    train_accuracy_list.append(train_accuracy)
    test_loss_list.append(test_loss)
    test_accuracy_list.append(test_accuracy)

    if test_accuracy > best_accuracy:
        print(f"Saving the best model with accuracy: {test_accuracy:.2f}%")
        best_accuracy = test_accuracy
        best_model_weights = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), "best_model.pth")



Epoch 1/2: 100%|██████████| 4218/4218 [14:33<00:00,  4.83batch/s, Loss: 0.068777]
Epoch 2/2:   9%|▉         | 381/4218 [00:49<08:20,  7.67batch/s, Loss: 0.012807]


KeyboardInterrupt: 

In [8]:
def plot_loss(train_losses, test_losses):
    plt.figure()
    plt.plot(range(len(train_losses)), train_losses, label='Training Loss')
    plt.plot(range(len(test_losses)), test_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('loss_plot.png')
    plt.show()

def plot_accuracy(train_accuracies, test_accuracies):
    plt.figure()
    plt.plot(range(len(train_accuracies)), train_accuracies, label='Training Accuracy')
    plt.plot(range(len(test_accuracies)), test_accuracies, label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig('accuracy_plot.png')
    plt.show()

def plot_image(dataset, model, classes):
    idx = random.randint(0, len(dataset))
    label = dataset[idx][1]
    img = dataset[idx][0].unsqueeze(0).to(device)  # Move the input image tensor to the GPU
    model.eval()
    #model.to(device)  # Move the model to the GPU
    output = model(img)
    _, predicted = torch.max(output.data, 1)
    # Convert the image and show it
    img = img.squeeze().permute(1, 2, 0).cpu()  # Move the image tensor back to the CPU and adjust dimensions
    plt.imshow(img)
    plt.axis('off')
    plt.title(f'Predicted: {classes[predicted]}, True: {classes[label]}')
    plt.savefig('predicted_image.png')
    plt.show()
    print("Predicted label: ", classes[predicted[0].item()])
    print("Actual label: ", classes[label])