In [None]:
from torch.utils.data import Dataset, DataLoader
import torch
import cv2
import glob
from torchvision.io import read_image
from clearml import Task, logger
from torchvision.transforms import ToTensor, Compose, Normalize
import numpy as np
from datetime import datetime

from torchvision import models
import time
import copy
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm


from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
import gc

from clearml import Task, Logger, Dataset
#clearml keys should be here

In [None]:
# Download dataset from ClearML
# dataset_name='Dataset_phones_splited_v1' 
# dataset_project='Check in car'
# dataset_path = Dataset.get(
#     dataset_name=dataset_name, 
#     dataset_project=dataset_project
# ).get_local_copy()

In [None]:
# Make task in ClearML 
# task = Task.init(
#     project_name='Check in car', 
#     task_name='MobileNetV2_cls_v5', 
#     tags=['classification','MobileNetV2', 'StepLR'])
# logger = task.get_logger()

In [None]:
class Drivers_with_phone_Dataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        #         self.img_labels = annotations_file
        self.img_dir = img_dir
        file_list = glob.glob(self.img_dir + "*")
        self.data = []
        for class_path in file_list:
            class_name = class_path.split("/")[-1]
            for img_path in glob.glob(class_path + "/*.jpg"):
                self.data.append([img_path, class_name])
        self.class_map = {"cellphone": 1, "no_cellphone": 0}
        self.transform = Compose([ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
        self.target_transform = target_transform
        self.img_dim = (640, 480)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, class_name = self.data[idx]
        img = cv2.imread(img_path)
        img = cv2.resize(img, self.img_dim)
        class_id = self.class_map[class_name]
        class_id = torch.tensor(class_id)
        if self.transform:
            img_tensor = self.transform(img)

        return img_tensor, class_id

In [None]:
def train_one_epoch(epoch_index, tb_writer):
    full_batch_loss = 0.
    total = 0
    correct = 0
    total_step = len(train_data_loader)

    for i, data in enumerate(train_data_loader):
        
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        scheduler.step()
        
        # Gather data and report
        full_batch_loss += float(loss.item())
        
        #Acc check
        _,pred = torch.max(outputs, dim=1)

        correct += torch.sum(pred==labels).item()
        total += labels.size(0)

        torch.cuda.empty_cache()
    
    train_acc_epoch = (100 * correct / total)
    avg_loss = (full_batch_loss/total_step)
    del full_batch_loss
    gc.collect()
    torch.cuda.empty_cache()
    
    return avg_loss, train_acc_epoch

In [None]:
#Const
n_epochs = 10
batch_size = 32
lr = 0.001
best_vloss = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []

# Download data for testing and training
drivers_data_train = dataset_path + '/dataset_phones_splited/train/'
drivers_data_test= dataset_path + '/dataset_phones_splited/test/'

train_data = Drivers_with_phone_Dataset(drivers_data_train)
test_data = Drivers_with_phone_Dataset(drivers_data_test)

print(len(train_data), len(test_data))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_data_loader = DataLoader(train_data, batch_size=batch_size,
                           shuffle=True, num_workers=0)
test_data_loader = DataLoader(test_data, batch_size=batch_size,
                          shuffle=False, num_workers=0)

 


for features, labels in train_data_loader:
    print("Shape of batch of features:        ", features.shape)
    print("Shape of the corresponding labels: ", labels.shape)
    break

# Model, optomizer, loss function
model = models.mobilenet_v2(pretrained=False)
n = model.classifier[1].in_features
model.classifier = nn.Linear(n, 2)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler =  torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/MobileNet_v2_{}'.format(timestamp))
epoch_number = 0

for epoch in range(n_epochs):
    print('EPOCH {}:'.format(epoch_number + 1))
    
    torch.cuda.empty_cache()
    # On gradients
    model.train(True)
    avg_loss, train_acc_epoch = train_one_epoch(epoch_number, writer)

    # Off gradients
    model.train(False)
    torch.cuda.empty_cache()
    
    with torch.no_grad():
        print("Validation phase")
        running_vloss = 0.0
        total_v = 0
        correct_v = 0
        for i, vdata in enumerate(test_data_loader):
            vinputs, vlabels = vdata
            vinputs, vlabels = vinputs.to(device), vlabels.to(device) 
            voutputs = model(vinputs)
            vloss = criterion(voutputs, vlabels)
            running_vloss += float(vloss.item())

            _,pred_v = torch.max(voutputs, dim=1)
            correct_v += torch.sum(pred_v==vlabels).item()
            total_v += vlabels.size(0)
            torch.cuda.empty_cache()

    avg_vloss = running_vloss / len(test_data_loader)
    val_acc_epoch = (100 * correct_v / total_v)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
    print("ACC train {} valid {}".format(train_acc_epoch, val_acc_epoch))
    
    del running_vloss
    gc.collect()
    
    # Metrics
    train_loss.append(avg_loss)
    val_loss.append(float(avg_vloss))
    train_acc.append(train_acc_epoch)
    val_acc.append(val_acc_epoch)
    
    # When Kaggle is down
#     json.dump(train_loss, open("train_loss.json", "w"), indent=4)
#     json.dump(val_loss, open("val_loss.json", "w"), indent=4)
#     json.dump(train_acc, open("train_acc.json", "w"), indent=4)
#     json.dump(val_acc, open("val_acc.json", "w"), indent=4)
    
    
    #Log loss and accuracy to Clear ML
    logger.report_scalar(
        'Training vs. Validation Loss', "Training", iteration=epoch_number + 1, value=avg_loss
    )
    logger.report_scalar(
        'Training vs. Validation Loss', "Validation", iteration=epoch_number + 1, value=avg_vloss
    )
    logger.report_scalar(
        'Training vs. Validation Accuracy', "Training", iteration=epoch_number + 1, value=train_acc_epoch
    )
    logger.report_scalar(
        'Training vs. Validation Accuracy', "Validation", iteration=epoch_number + 1, value=val_acc_epoch
    )

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, "best")
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

# Test, Confusion Matrix, ROC-AUC

In [None]:
# # Code To Test Pretrained Model

# drivers_data_test= "/kaggle/input/dataset-phones-splited/dataset_phones_splited/test/"
# test_data = Drivers_with_phone_Dataset(drivers_data_test)
# test_data_loader = DataLoader(test_data, batch_size=32,
#                           shuffle=False, num_workers=0)

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model = models.mobilenet_v2()
# n = model.classifier[1].in_features
# model.classifier = nn.Linear(n, 2)
# model = model.to(device)
# model.load_state_dict(torch.load("/kaggle/input/model-resnet/model_20230328_082109_best"))

In [None]:
# Test Model

y_pred_list = []
y_test = []
y_pred_proba = []
with torch.no_grad():
    model.eval()
    for X_batch, y_batch in test_data_loader:
        X_batch = X_batch.to(device)
        y_test_pred = model(X_batch)
        _, y_pred_tags = torch.max(y_test_pred, dim = 1)
        y_pred_list.append(y_pred_tags.cpu().numpy())
        y_test.append(y_batch.cpu().numpy())
        y_pred_proba.append(y_test_pred)
        
# for conf matr and roc
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]
y_test = [a.squeeze().tolist() for a in y_test]

# one big list
y_pred_list_flat = [item for sublist in y_pred_list for item in sublist]
y_test_flat = [item for sublist in y_test for item in sublist]

In [None]:
# Calc Confusion matrix and log to Clear ML
confusion_matrix_1 = confusion_matrix(y_test_flat, y_pred_list_flat)

logger.report_matrix(
    "Confusion_matrix_",
    "ignored",
    matrix=confusion_matrix_1,
    xaxis="Predicted lable",
    yaxis="True label",
    xlabels= ["no phones", "phone"],
    ylabels=  ["no phones", "phone"],
    yaxis_reversed=True
)

In [None]:
# Calc Roc-Auc 
fpr, tpr, thresholds = roc_curve(y_test_flat, y_pred_list_flat)
auc = roc_auc_score(y_test_flat, y_pred_list_flat)
logger.report_scatter2d(
    "ROC AUC Curve",
    "ROC",
    scatter=zip(fpr,tpr),
    yaxis="tpr",
    xaxis="fpr",
    mode='lines+markers'
)
logger.flush()