In [1]:
import torch
import torch.nn as nn
from torch.optim import AdamW, Adam, SGD
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import LambdaLR
import torch.nn.functional as F

import pandas as pd
from tqdm.auto import tqdm

In [2]:
from timeit import default_timer as timer
from torch.profiler import profile, record_function, ProfilerActivity

In [3]:
# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Utils Function

In [4]:
def print_train_time(start:float, end:float, device: torch.device = None):
    """print difference between start and end time"""
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

In [5]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [6]:
def results(res: str):
    res_lst = res.split('\n')

    data = []
    for row in res_lst[3:-4]:
        row_split = [sp.strip() for sp in row[57:].split('  ') if sp != '']
        data.append(row_split)

    index = [l[:57].strip() for l in res_lst[3:-4]]

    head = [c.strip() for c in res_lst[1].strip().split('  ') if c != '']

    df = pd.DataFrame(data, columns=None)
    df.columns = head[1:]
    df.index = index

    return df

# Model CNN

In [7]:
from model import CNNModel
from training_function import train_step, test_step, accuracy_fn

# Data

In [8]:
import joblib
from torch.utils import data

In [9]:
train_data = 'train.jb'
valid_data = 'test.jb'

In [10]:
n_workers = 0
valid_steps = 1
save_steps = 50
epochs = 25

lr = 3e-05
batch_size = 128

In [11]:
train_x, train_y = joblib.load(train_data)
val_x, val_y = joblib.load(valid_data)

train_y, val_y = pd.get_dummies(train_y).values, pd.get_dummies(val_y).values

train_set = data.TensorDataset(torch.Tensor(train_x), torch.Tensor(train_y))
valid_set = data.TensorDataset(torch.Tensor(val_x), torch.Tensor(val_y))

In [12]:
train_x.shape, val_x.shape

((6900, 3, 500), (3477, 3, 500))

In [13]:
train_dataloader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=n_workers,
    pin_memory=True
  )

valid_dataloader = DataLoader(
    valid_set,
    batch_size=len(valid_set),
    num_workers=n_workers,
    drop_last=True,
    pin_memory=True
  )

train_dataloader_all = DataLoader(
    train_set,
    batch_size=len(train_set),
    shuffle=False,
    drop_last=True,
    num_workers=n_workers,
    pin_memory=True
  )

# Training Model

In [14]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [15]:
def metrices(y_true, y_pred):
    # y_true is the ground truth labels
    # y_pred is the predicted labels

    cf_matrix = confusion_matrix(y_true, y_pred)

    # Extract the relevant statistics from the confusion matrix
    true_positives = cf_matrix[1, 1]
    false_positives = cf_matrix[0, 1]
    true_negatives = cf_matrix[0, 0]
    false_negatives = cf_matrix[1, 0]

    # Calculate accuracy
    accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)

    # Calculate true positive rate (sensitivity or recall)
    true_positive_rate = true_positives / (true_positives + false_negatives)

    # Calculate true negative rate (specificity)
    true_negative_rate = true_negatives / (true_negatives + false_positives)

    # Calculate precision
    precision = true_positives / (true_positives + false_positives)

    # Calculate false positive rate (fall-out)
    false_positive_rate = false_positives / (false_positives + true_negatives)

    # Calculate F1 score
    f1_score = 2 * (precision * true_positive_rate) / (precision + true_positive_rate)

    # print(f'accuracy: {100*accuracy:.2f} | true_positive_rate: {100*true_positive_rate:.2f} | true_negative_rate: {100*true_negative_rate:.2f} | precision: {100*precision:.2f} | false_positive_rate: {100*false_positive_rate:.2f} | f1_score: {100*f1_score:.2f}')
    return [accuracy, true_positive_rate, true_negative_rate, precision, false_positive_rate, f1_score]

# GPU

In [16]:
# Create an instance of model_1
torch.manual_seed(42)
device = 'cuda'
model = CNNModel().to(device)
next(model.parameters()).device

device(type='cuda', index=0)

In [17]:
optimizer = AdamW(model.parameters(), lr=lr)
scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch:0.9**epoch)
loss_fn = nn.CrossEntropyLoss()

In [18]:
Metrice_list_train = []
Metrice_list_test = []

for _ in tqdm(range(20)):
    time_consume_train = 0
    time_consume_pred = 0
    for epoch in range(epochs):
        
        start_time = timer()
        train_step(model=model,
                   data_loader=train_dataloader, 
                   loss_fn=loss_fn,
                   optimizer=optimizer,
                   accuracy_fn=accuracy_fn,
                   device=device)
        end_time = timer()
        time_consume_train += end_time - start_time
        

        start_time = timer()
        test_step(model=model,
                   data_loader=valid_dataloader, 
                   loss_fn=loss_fn,
                   accuracy_fn=accuracy_fn,
                   device=device)
        end_time = timer()
        time_consume_pred += end_time - start_time
        scheduler.step()
    
    for X, y in valid_dataloader:
        X, y = X.to(device), y.to(device)
        
        with torch.inference_mode():
            test_pred = model(X)
            y_pred = test_pred.argmax(dim=1).cpu().numpy()
            y_true = y.argmax(dim=1).cpu().numpy()
            met_test = metrices(y_true, y_pred)
            Metrice_list_test.append(met_test + [time_consume_pred])
        
    for X, y in train_dataloader_all:
        X, y = X.to(device), y.to(device)
        
        with torch.inference_mode():
            train_pred = model(X)
            y_pred = train_pred.argmax(dim=1).cpu().numpy()
            y_true = y.argmax(dim=1).cpu().numpy()
            met_test = metrices(y_true, y_pred)
            Metrice_list_train.append(met_test + [time_consume_train])

  0%|          | 0/20 [00:00<?, ?it/s]

Train loss: 0.43773 | Train acc: 85.35%
Test loss: 0.31934 | Test acc: 93.13 %

Train loss: 0.31105 | Train acc: 87.93%
Test loss: 0.21775 | Test acc: 93.18 %

Train loss: 0.27246 | Train acc: 88.24%
Test loss: 0.21428 | Test acc: 94.13 %

Train loss: 0.25472 | Train acc: 88.94%
Test loss: 0.20507 | Test acc: 93.67 %

Train loss: 0.24159 | Train acc: 89.55%
Test loss: 0.18687 | Test acc: 94.13 %

Train loss: 0.23409 | Train acc: 89.96%
Test loss: 0.17100 | Test acc: 94.16 %

Train loss: 0.22222 | Train acc: 90.54%
Test loss: 0.15398 | Test acc: 94.28 %

Train loss: 0.21497 | Train acc: 90.68%
Test loss: 0.17187 | Test acc: 94.56 %

Train loss: 0.21074 | Train acc: 90.89%
Test loss: 0.15181 | Test acc: 94.51 %

Train loss: 0.20626 | Train acc: 91.16%
Test loss: 0.17723 | Test acc: 94.02 %

Train loss: 0.20243 | Train acc: 91.24%
Test loss: 0.15300 | Test acc: 94.54 %

Train loss: 0.19813 | Train acc: 91.39%
Test loss: 0.16939 | Test acc: 94.33 %

Train loss: 0.19563 | Train acc: 91.64%


In [20]:
df = pd.DataFrame(Metrice_list_train)
df.columns = ['accuracy','true_positive_rate','true_negative_rate','precision','false_positive_rate','f1_score', 'time spent(training)']
df.head()
df.to_csv(f'{device}_cnn_train_metrice.csv')

In [21]:
df = pd.DataFrame(Metrice_list_test)
df.columns = ['accuracy','true_positive_rate','true_negative_rate','precision','false_positive_rate','f1_score', 'time spent(testing)']
df.head()
df.to_csv(f'{device}_cnn_test_metrice.csv')

# CPU

In [22]:
# Create an instance of model_1
torch.manual_seed(42)
device = 'cpu'
model = CNNModel().to(device)
next(model.parameters()).device

device(type='cpu')

In [23]:
optimizer = AdamW(model.parameters(), lr=lr)
scheduler = LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch:0.9**epoch)
loss_fn = nn.CrossEntropyLoss()

In [24]:
Metrice_list_train = []
Metrice_list_test = []

for _ in tqdm(range(20)):
    time_consume_train = 0
    time_consume_pred = 0
    for epoch in range(epochs):
        
        start_time = timer()
        train_step(model=model,
                   data_loader=train_dataloader, 
                   loss_fn=loss_fn,
                   optimizer=optimizer,
                   accuracy_fn=accuracy_fn,
                   device=device)
        end_time = timer()
        time_consume_train += end_time - start_time
        

        start_time = timer()
        test_step(model=model,
                   data_loader=valid_dataloader, 
                   loss_fn=loss_fn,
                   accuracy_fn=accuracy_fn,
                   device=device)
        end_time = timer()
        time_consume_pred += end_time - start_time
        scheduler.step()
    
    for X, y in valid_dataloader:
        X, y = X.to(device), y.to(device)
        with torch.inference_mode():
            test_pred = model(X)
            y_pred = test_pred.argmax(dim=1).cpu().numpy()
            y_true = y.argmax(dim=1).cpu().numpy()
            met_test = metrices(y_true, y_pred)
            Metrice_list_test.append(met_test + [time_consume_pred])
        
    for X, y in train_dataloader_all:
        X, y = X.to(device), y.to(device)
        with torch.inference_mode():
            train_pred = model(X)
            y_pred = train_pred.argmax(dim=1).cpu().numpy()
            y_true = y.argmax(dim=1).cpu().numpy()
            met_test = metrices(y_true, y_pred)
            Metrice_list_train.append(met_test + [time_consume_train])

  0%|          | 0/20 [00:00<?, ?it/s]

Train loss: 0.43772 | Train acc: 85.35%
Test loss: 0.31929 | Test acc: 93.13 %

Train loss: 0.31105 | Train acc: 87.93%
Test loss: 0.21772 | Test acc: 93.18 %

Train loss: 0.27247 | Train acc: 88.24%
Test loss: 0.21417 | Test acc: 94.13 %

Train loss: 0.25473 | Train acc: 88.93%
Test loss: 0.20503 | Test acc: 93.67 %

Train loss: 0.24160 | Train acc: 89.55%
Test loss: 0.18687 | Test acc: 94.13 %

Train loss: 0.23411 | Train acc: 89.95%
Test loss: 0.17100 | Test acc: 94.16 %

Train loss: 0.22224 | Train acc: 90.54%
Test loss: 0.15399 | Test acc: 94.28 %

Train loss: 0.21499 | Train acc: 90.68%
Test loss: 0.17191 | Test acc: 94.56 %

Train loss: 0.21076 | Train acc: 90.89%
Test loss: 0.15181 | Test acc: 94.51 %

Train loss: 0.20627 | Train acc: 91.16%
Test loss: 0.17724 | Test acc: 94.02 %

Train loss: 0.20244 | Train acc: 91.24%
Test loss: 0.15301 | Test acc: 94.54 %

Train loss: 0.19814 | Train acc: 91.39%
Test loss: 0.16940 | Test acc: 94.33 %

Train loss: 0.19564 | Train acc: 91.64%


In [25]:
df = pd.DataFrame(Metrice_list_train)
df.columns = ['accuracy','true_positive_rate','true_negative_rate','precision','false_positive_rate','f1_score', 'time spent(training)']
df.head()
df.to_csv(f'{device}_cnn_train_metrice.csv')

In [26]:
df = pd.DataFrame(Metrice_list_test)
df.columns = ['accuracy','true_positive_rate','true_negative_rate','precision','false_positive_rate','f1_score', 'time spent(testing)']
df.head()
df.to_csv(f'{device}_cnn_test_metrice.csv')