In [1]:
import os
import sys
import torch

import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
import pandas as pd
import lightning.pytorch as pl

from lightning.pytorch import loggers as pl_loggers
from lightning.pytorch.callbacks import EarlyStopping
from torchmetrics import Accuracy
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from Resnet_from_scratch import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152

from sklearn.metrics import confusion_matrix
from PIL import Image
import numpy as np
import seaborn as sns

In [2]:
import torchmetrics.functional as functional
import torchmetrics
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

class VitLightningModule(pl.LightningModule):
    def __init__(self, model, learning_rate=1e-3, num_classes=11):
        super(VitLightningModule, self).__init__()
        self.model = model
        self.learning_rate = learning_rate
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes=num_classes)
        self.validation_step_y_hats = []
        self.validation_step_ys = []

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        acc = self.accuracy(y_hat.softmax(dim=-1), y)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        acc = self.accuracy(y_hat.softmax(dim=-1), y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        acc = self.accuracy(y_hat.softmax(dim=-1), y)
        self.log('test_loss', loss)
        self.log('test_acc', acc)

        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)


In [3]:
DATA_DIR = os.path.join(os.curdir, "data","raw","dataset")

In [4]:
TRAIN_SIZE = 0.8
BATCH_SIZE = 32

In [5]:
'''preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),          
    transforms.Normalize(           
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

data_dir = os.path.join(os.curdir, "data", "raw","dataset")
dataset = datasets.ImageFolder(data_dir, transform=preprocess)

train_size = int(TRAIN_SIZE * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# need to expirement with number of workers in each dataloader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)'''

'preprocess = transforms.Compose([\n    transforms.Resize((224, 224)),\n    transforms.ToTensor(),          \n    transforms.Normalize(           \n        mean=[0.485, 0.456, 0.406],\n        std=[0.229, 0.224, 0.225],\n    ),\n])\n\ndata_dir = os.path.join(os.curdir, "data", "raw","dataset")\ndataset = datasets.ImageFolder(data_dir, transform=preprocess)\n\ntrain_size = int(TRAIN_SIZE * len(dataset))\nval_size = int(0.1 * len(dataset))\ntest_size = len(dataset) - train_size - val_size\ntrain_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])\n\n# need to expirement with number of workers in each dataloader\ntrain_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\nval_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)\ntest_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)'

In [6]:
class ResNetLightningModule(pl.LightningModule):
    def __init__(self, model, learning_rate=1e-4 ,num_classes=11):
        super(ResNetLightningModule, self).__init__()
        self.model = model
        self.learning_rate = learning_rate
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ])
        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy(task="multiclass", num_classes = num_classes)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        acc = self.accuracy(y_hat.softmax(dim=-1), y)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        acc = self.accuracy(y_hat.softmax(dim=-1), y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        acc = self.accuracy(y_hat.softmax(dim=-1), y)
        self.log('test_loss', loss)
        self.log('test_acc', acc)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
    
        
    

In [7]:
class ImageFolderDataModule(pl.LightningDataModule):
    def __init__(self, data_dir, batch_size=32, train_size=0.8):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.train_size = train_size
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ])

    def setup(self, stage=None):
        dataset = datasets.ImageFolder(self.data_dir, transform=self.transform)
        train_size = int(self.train_size * len(dataset))
        val_size = int(0.1 * len(dataset))
        test_size = len(dataset) - train_size - val_size
        self.train_dataset, self.val_dataset, self.test_dataset = random_split(dataset, [train_size, val_size, test_size])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)
    
    


In [8]:
def plot_loss_curves(logger):
    metrics_path = os.path.join(logger.save_dir, logger.name, f"version_{logger.version}", "metrics.csv")
    
    if not os.path.exists(metrics_path):
        raise FileNotFoundError(f"Metrics file not found at: {metrics_path}")

    metrics = pd.read_csv(metrics_path)

    plt.figure(figsize=(10, 5))
    train_loss = metrics.dropna(subset=['train_loss'])
    val_loss = metrics.dropna(subset=['val_loss'])
    
    plt.plot(train_loss['epoch'], train_loss['train_loss'], label='Training Loss')
    plt.plot(val_loss['epoch'], val_loss['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training vs Validation Loss')
    plt.show()

In [9]:
def main():
    model = ResNet18(num_classes = 11, channels = 3)
    pl_model = ResNetLightningModule(model)

    data_dir = os.path.join(os.curdir, "data", "raw", "dataset")
    data_module = ImageFolderDataModule(data_dir)

    csv_logger = pl_loggers.CSVLogger('logs/', name='csv_logs')

    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=4, mode="min")

    trainer = pl.Trainer(callbacks=[early_stop_callback] ,max_epochs = 15 , devices = 1, accelerator='gpu', logger = csv_logger)
    trainer.fit(pl_model, data_module)
    trainer.test(pl_model, data_module)

    plot_loss_curves(csv_logger)
    

if __name__ == "__main__":
    main()


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | model     | ResNet             | 11.2 M | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | accuracy  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.748    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/mshaker/miniconda3/envs/res/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.
/home/mshaker/miniconda3/envs/res/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]