In [1]:
%cd ..
%cd ..

/home/ltorres/leo/tesis/cloud-classification/code/notebooks
/home/ltorres/leo/tesis/cloud-classification/code


In [2]:
%load_ext autoreload
%autoreload 2

---

In [3]:
from torchvision.io import read_image
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from random import sample
import torch
import torch.nn as nn
from torch.optim import lr_scheduler

In [4]:
from src.dataset import GCD
from src import config
from src import engine_gnn as engine
from src import utils

from src.models.graph_nets import GraphConvGNN, GATConvGNN

from notebooks.adjacency_construction.adjacency_models import *

In [5]:
from sklearn.metrics import accuracy_score

In [6]:
import wandb

#### PARAMS

In [7]:
device = 'cuda:1'


BUILDER = 'mlp'
builder_name = "MLP"
LR = 3e-4



exp_name = '3_MLP_06_04_22'
model_filename = f'/adjacency_builders/{exp_name}.pt'

---

In [8]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mleonardtd[0m (use `wandb login --relogin` to force relogin)


True

#### Data loaders

In [9]:
path_train_images = utils.get_gcd_paths(config.DATA_DIR,'train')

In [10]:
train_dataset = GCD(path_train_images, resize=256)

train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=4,
        shuffle=True,
    )

In [11]:
path_test_images = utils.get_gcd_paths(config.DATA_DIR,'test')

test_dataset = GCD(path_test_images, resize=256)

test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=4,
        shuffle=False,
    )

### Data augmentation data loaders

In [12]:
aug_dataset = GCD(path_train_images, resize=256, aug_types='augmentation')

augmentation_loader = torch.utils.data.DataLoader(
        aug_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=4,
        shuffle=True,
    )

#### Model

In [13]:
from torchvision import models

In [14]:
torch.cuda.empty_cache()

In [15]:
model = GATConvGNN(7, 1024, 4, 4, 0.7, adjacency_builder = BUILDER).to(device)

In [16]:
criterion = utils.loge_loss
optimizer  = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

####  wandb

In [17]:
wandb.init(
    # Set the project where this run will be logged
    project="cloud classification - adjacency",
    # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
    name=exp_name, 
    # Track hyperparameters and run metadata
    config={
    "learning_rate": LR,
    "architecture": "GATConvGNN",
    "loss": "LogeLoss",
    "optim": "SGD",
    "dataset": "GCD",
    "builder": f"{builder_name}",
    "epochs": config.EPOCHS,
  })

### Train

In [18]:
best_accuracy=0
best_iteration=0
best_params={}

for e in range(config.EPOCHS):
    ### TRAIN DATASET
    preds, targets, loss = engine.train_fn(model, train_loader, criterion, optimizer, device=device)
    train_acc = accuracy_score(targets, preds)
    
    ### AUGMENTATION IMAGES
    engine.forward_backward_pass(model, augmentation_loader, criterion, optimizer, device=device)
    
    scheduler.step()
    
    test_preds, test_targets, test_loss = engine.eval_fn(model, test_loader, criterion, device=device)
    test_acc = accuracy_score(test_targets, test_preds)
    
    if test_acc > best_accuracy:
        torch.save(model.state_dict(), config.SAVE_PATH+model_filename)
        print(f"Saved best parameters at epoch {e+1}")
        best_accuracy = test_acc
        best_iteration = e+1
        best_params = model.state_dict()
    
    print("EPOCH {}: Train acc: {:.2%} Train Loss: {:.4f} Test acc: {:.2%} Test Loss: {:.4f}".format(
        e+1,
        train_acc,
        loss,
        test_acc,
        test_loss
    ))
    
    metrics = {
                "train/train_loss": loss,
                "train/train_accuracy": train_acc,
                "test/test_loss": test_loss,
                "test/test_accuracy": test_acc,
              }

    wandb.log(metrics)


100%|██████████| 313/313 [00:59<00:00,  5.22it/s]
100%|██████████| 313/313 [00:58<00:00,  5.36it/s]
100%|██████████| 282/282 [00:18<00:00, 15.10it/s]


Saved best parameters at epoch 1
EPOCH 1: Train acc: 63.78% Train Loss: 2.0965 Test acc: 72.51% Test Loss: 1.6313


100%|██████████| 313/313 [00:59<00:00,  5.25it/s]
100%|██████████| 313/313 [00:58<00:00,  5.34it/s]
100%|██████████| 282/282 [00:18<00:00, 15.07it/s]


EPOCH 2: Train acc: 86.24% Train Loss: 0.8110 Test acc: 70.36% Test Loss: 1.6263


100%|██████████| 313/313 [00:59<00:00,  5.22it/s]
100%|██████████| 313/313 [00:58<00:00,  5.34it/s]
100%|██████████| 282/282 [00:18<00:00, 15.10it/s]


EPOCH 3: Train acc: 89.57% Train Loss: 0.6465 Test acc: 69.99% Test Loss: 1.6815


100%|██████████| 313/313 [00:59<00:00,  5.24it/s]
100%|██████████| 313/313 [00:58<00:00,  5.34it/s]
100%|██████████| 282/282 [00:18<00:00, 15.13it/s]


EPOCH 4: Train acc: 90.63% Train Loss: 0.5621 Test acc: 72.22% Test Loss: 1.5484


100%|██████████| 313/313 [00:59<00:00,  5.23it/s]
100%|██████████| 313/313 [00:58<00:00,  5.31it/s]
100%|██████████| 282/282 [00:18<00:00, 15.14it/s]


Saved best parameters at epoch 5
EPOCH 5: Train acc: 92.43% Train Loss: 0.4568 Test acc: 78.06% Test Loss: 1.3795


100%|██████████| 313/313 [00:59<00:00,  5.23it/s]
100%|██████████| 313/313 [00:58<00:00,  5.34it/s]
100%|██████████| 282/282 [00:18<00:00, 15.10it/s]


EPOCH 6: Train acc: 92.98% Train Loss: 0.4278 Test acc: 77.33% Test Loss: 1.3801


100%|██████████| 313/313 [00:59<00:00,  5.23it/s]
100%|██████████| 313/313 [00:58<00:00,  5.33it/s]
100%|██████████| 282/282 [00:18<00:00, 15.15it/s]


EPOCH 7: Train acc: 94.31% Train Loss: 0.3509 Test acc: 76.69% Test Loss: 1.4904


100%|██████████| 313/313 [00:59<00:00,  5.23it/s]
100%|██████████| 313/313 [00:58<00:00,  5.32it/s]
100%|██████████| 282/282 [00:18<00:00, 15.07it/s]


Saved best parameters at epoch 8
EPOCH 8: Train acc: 94.88% Train Loss: 0.3155 Test acc: 78.09% Test Loss: 1.3755


100%|██████████| 313/313 [00:59<00:00,  5.22it/s]
100%|██████████| 313/313 [00:58<00:00,  5.33it/s]
100%|██████████| 282/282 [00:18<00:00, 15.16it/s]


Saved best parameters at epoch 9
EPOCH 9: Train acc: 95.78% Train Loss: 0.2677 Test acc: 78.26% Test Loss: 1.4400


100%|██████████| 313/313 [00:59<00:00,  5.23it/s]
100%|██████████| 313/313 [00:58<00:00,  5.34it/s]
100%|██████████| 282/282 [00:18<00:00, 15.28it/s]


EPOCH 10: Train acc: 95.93% Train Loss: 0.2495 Test acc: 76.20% Test Loss: 1.5371


100%|██████████| 313/313 [00:59<00:00,  5.22it/s]
100%|██████████| 313/313 [00:58<00:00,  5.32it/s]
100%|██████████| 282/282 [00:18<00:00, 15.15it/s]


EPOCH 11: Train acc: 97.54% Train Loss: 0.1687 Test acc: 77.92% Test Loss: 1.4082


 65%|██████▍   | 202/313 [00:38<00:21,  5.19it/s]


KeyboardInterrupt: 

In [19]:
wandb.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
test/test_accuracy,▃▁▁▃█▇▇██▆█
test/test_loss,▇▇█▅▁▁▄▁▂▅▂
train/train_accuracy,▁▆▆▇▇▇▇▇███
train/train_loss,█▃▃▂▂▂▂▂▁▁▁

0,1
test/test_accuracy,0.77922
test/test_loss,1.40823
train/train_accuracy,0.9754
train/train_loss,0.16873


In [None]:
model.load_state_dict(torch.load(config.SAVE_PATH+model_filename))

In [None]:
test_preds, test_targets, test_loss = engine.eval_fn(model, test_loader, criterion, device=device)

In [None]:
accuracy_score(test_targets, test_preds)

---

### Accuracy per class and confusion matrix

In [None]:
class_mapper = {
    0: '1_cumulus',
    1: '2_altocumulus',
    2: '3_cirrus',
    3: '4_clearsky',
    4: '5_stratocumulus',
    5: '6_cumulonimbus',
    6: '7_mixed',
}

In [None]:
class_acc = dict()
matrix = np.zeros((7,7), dtype=int)

for i in range(7):
    pred_index = np.where(test_targets==i)[0]
    class_acc[class_mapper[i]] = accuracy_score(test_targets[pred_index], test_preds[pred_index])
    
    for j in test_preds[pred_index]:
        matrix[i,j]+=1 

In [None]:
class_acc

In [None]:
print(f"Global Test accuracy {accuracy_score(test_targets, test_preds)}")

sum_of_rows = matrix.sum(axis=1)
conf_mat = 100*matrix / sum_of_rows[:, np.newaxis]

df_cm = pd.DataFrame(conf_mat, index = class_acc.keys(),
                  columns = class_acc.keys())

plt.figure(figsize = (8,5))
plt.title("Confusion Matrix (Accuracy %)")
sns.heatmap(df_cm, annot=True)