In [None]:
!unzip -q data.zip

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim  
import torchvision.transforms as transforms
import torchvision
import os
from torchvision.io import decode_jpeg
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets,models
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torchvision.io import read_image, ImageReadMode
from pytorch_lightning.loggers import WandbLogger
import wandb

In [None]:
PATH = './'
NUM_CLASSES = 10
BATCH_SIZE = 32
lr = 1e-5
epochs = 100
dropout = 0

config = {
    'base_model': 'EfficientNetB4',
    'num_classes': NUM_CLASSES,
    'batch_size': BATCH_SIZE,
    'learning_rate': lr,
    'frozen_layers': 2,
    'frozen_blocks': 0,
    'epochs': 100,
    'image_size': (480, 640),
    'dropout': dropout,
    'Augmentation': "Color Jitter"
}

In [None]:
class CreateDataset(Dataset):
    def __init__(self, df, transform=False):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):   
        img_path = self.df.iloc[index, 0]
        image = read_image(PATH+img_path, mode=ImageReadMode.GRAY) / 255.0
        image = image.repeat((3, 1, 1))
        label = self.df.iloc[index, 1]

        if self.transform:
            image = self.transform(image)
        
        if self.df.iloc[index, 3] == "Camera 2":
            image = transforms.RandomHorizontalFlip(p=1.0)(image)
            if label == 4 or label == 3:
                label -= 2
            elif label == 1 or label == 2:
                label += 2

        return image, int(label)
    
    
class ConcatDataset(Dataset):
    def __init__(self, *datasets):
        super().__init__()
        self.datasets = datasets
        
    def __len__(self):
        return max(len(dataset) for dataset in self.datasets)
    
    def __getitem__(self, idx):
        return tuple(dataset[idx % len(dataset)] for dataset in self.datasets)

In [None]:
transformers_test = transforms.Compose([
    transforms.Resize((480, 640)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transformers_train = transforms.Compose([
    transforms.Resize((480, 640)),
    transforms.ColorJitter(brightness=0.75, contrast=0.75, saturation=0.75),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train1_df = pd.read_csv(PATH+"data/train.csv")
train2_df = pd.read_csv(PATH+"data/new_train.csv")
val_df = pd.read_csv(PATH+"data/val.csv")
test_df = pd.read_csv(PATH+"data/test.csv")

train1_dataset=CreateDataset(train1_df, transformers_train)
train2_dataset=CreateDataset(train2_df, transformers_train)
train_dataset = ConcatDataset(train1_dataset, train2_dataset)
test_dataset=CreateDataset(test_df, transformers_test)
val_dataset=CreateDataset(val_df, transformers_test)

In [None]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE // 2, shuffle=True, num_workers=2)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [None]:
import pytorch_lightning as pl
import torchmetrics
from torch import nn


class Model(pl.LightningModule):
    def __init__(self, output_units, learning_rate):
        super().__init__()
        self.base_model = torchvision.models.resnet50(pretrained=True)
        self.base_model.fc = torch.nn.Linear(in_features=self.base_model.fc.in_features, out_features=output_units)
        
        freezing_layers = [
            self.base_model.conv1,
            self.base_model.bn1
        ]
        
        for layer in freezing_layers:
            for param in layer.parameters():
                param.requires_grad = False
                
        self.criterion = nn.CrossEntropyLoss()
        self.train_acc = torchmetrics.Accuracy()
        self.val_acc = torchmetrics.Accuracy()

        self.learning_rate = learning_rate
        self.save_hyperparameters()
        
    def forward(self, input_data):
        outputs = self.base_model(input_data)
        return outputs

    def training_step(self, batch, batch_nb):
        input_data = torch.cat([sub_batch[0] for sub_batch in batch], dim=0)
        targets = torch.cat([sub_batch[1] for sub_batch in batch], dim=0)
        preds = self(input_data)
        loss = self.criterion(preds, targets)
        self.log('train_loss', loss)
        self.train_acc(preds, targets)
        self.log('train_acc', self.train_acc, on_step=True, on_epoch=False, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_nb):
        self._evaluate(batch, 'val')
        
    def test_step(self, batch, batch_nb):
        self._evaluate(batch, 'test')
        
    def _evaluate(self, batch, name):
        input_data, targets = batch
        preds = self(input_data)
        loss = self.criterion(preds, targets)
        self.log(f'{name}_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.val_acc(preds, targets)
        self.log(f'{name}_acc', self.val_acc, on_step=False, on_epoch=True, prog_bar=True)
        
    def predict_step(self, batch, batch_nb):
        input_data, targets = batch
        preds = self(input_data)
        return torch.argmax(preds, dim=1)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=self.learning_rate, max_lr=1e-4, cycle_momentum=False)
        return [optimizer],[scheduler]

In [None]:
model = Model(NUM_CLASSES, lr)
callbacks = [
    pl.callbacks.ModelCheckpoint(monitor='val_acc', dirpath=PATH, verbose=True, mode='max', filename='resnet50-{val_acc:.4f}'),
    pl.callbacks.EarlyStopping(monitor='val_acc', patience=20, verbose=True, mode='max')
]
wandb_logger = WandbLogger(project="Driver-Distraction", entity='graduation-project', config=config)
trainer = pl.Trainer(max_epochs=epochs, callbacks=callbacks, gpus=1, logger=wandb_logger)    
trainer.fit(model, train_dataloader, val_dataloader)

In [None]:
trainer.test(dataloaders=test_dataloader)

In [None]:
trainer.validate(dataloaders=val_dataloader)

In [None]:
y_pred = trainer.predict(dataloaders=val_dataloader)

In [None]:
y_pred_all = []
for y in y_pred:
    y_pred_all.extend(y.cpu().tolist())

In [None]:
val_df['pred'] = y_pred_all

val_df.to_csv('val_pred.csv', index=False)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(val_df['label'], y_pred_all)

In [None]:
trainer.save_checkpoint("model.ckpt")