In [1]:
%cd ..

/home/ltorres/leo/tesis/cloud-classification/code


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from torchvision.io import read_image
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from random import sample
import torch
import torch.nn as nn

In [4]:
from src.dataset import GCD
from src import config
from src import engine
from src import utils

from src.models.graph_nets import ResNet50

In [5]:
from sklearn.metrics import accuracy_score

In [6]:
device = config.DEVICE

#### Data loaders

In [7]:
path_train_images = utils.get_gcd_paths(config.DATA_DIR,'train')

In [8]:
train_dataset = GCD(path_train_images, resize=256)

train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=4,
        shuffle=True,
    )

In [9]:
batch = next(iter(train_loader))

In [10]:
path_test_images = utils.get_gcd_paths(config.DATA_DIR,'test')

test_dataset = GCD(path_test_images, resize=256)

test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.BATCH_SIZE,
        num_workers=4,
        shuffle=False,
    )

### Data augmentation data loaders

In [11]:
augmentation_datasets = [
    GCD(utils.random_sample(path_train_images), resize=256, aug_types= atype)
    for atype in config.AUGMENTATION_TYPES
    ]

In [12]:
augmentation_loaders = [
    torch.utils.data.DataLoader(
            aug_dataset,
            batch_size=config.BATCH_SIZE,
            num_workers=4,
            shuffle=True,
        )
for aug_dataset in augmentation_datasets
]

#### Model

In [13]:
from torchvision import models

In [14]:
torch.cuda.empty_cache()

In [15]:
model = ResNet50(7).to(device)

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

In [17]:
best_accuracy=0
best_iteration=0
best_params={}


for e in range(config.EPOCHS):
    ### TRAIN DATASET
    preds, targets, loss = engine.train_fn(model, train_loader, criterion, optimizer, device=device)
    train_acc = accuracy_score(targets, preds)
    
    ### AUGMENTATION IMAGES
    # for _loader in augmentation_loaders:
    #     engine.forward_backward_pass(model, _loader, criterion, optimizer, device=device)
    
    test_preds, test_targets, test_loss = engine.eval_fn(model, test_loader, criterion, device=device)
    test_acc = accuracy_score(test_targets, test_preds)
    
    if test_acc > best_accuracy:
        print(f"Saved best parameters at epoch {e+1}")
        best_accuracy = test_acc
        best_iteration = e+1
        best_params = model.state_dict()
    
    print("EPOCH {}: Train acc: {:.2%} Train Loss: {:.4f} Test acc: {:.2%} Test Loss: {:.4f}".format(
        e+1,
        train_acc,
        loss,
        test_acc,
        test_loss
    ))


100%|██████████| 313/313 [00:55<00:00,  5.60it/s]
100%|██████████| 282/282 [00:17<00:00, 16.03it/s]


Saved best parameters at epoch 1
EPOCH 1: Train acc: 78.67% Train Loss: 0.5884 Test acc: 70.90% Test Loss: 1.0816


100%|██████████| 313/313 [00:55<00:00,  5.61it/s]
100%|██████████| 282/282 [00:17<00:00, 15.95it/s]


Saved best parameters at epoch 2
EPOCH 2: Train acc: 84.92% Train Loss: 0.4012 Test acc: 72.96% Test Loss: 1.0685


100%|██████████| 313/313 [00:55<00:00,  5.61it/s]
100%|██████████| 282/282 [00:17<00:00, 16.07it/s]


EPOCH 3: Train acc: 86.89% Train Loss: 0.3516 Test acc: 71.78% Test Loss: 1.1810


100%|██████████| 313/313 [00:55<00:00,  5.60it/s]
100%|██████████| 282/282 [00:17<00:00, 16.04it/s]


Saved best parameters at epoch 4
EPOCH 4: Train acc: 88.12% Train Loss: 0.3071 Test acc: 73.04% Test Loss: 1.0508


 94%|█████████▎| 293/313 [00:52<00:03,  5.58it/s]


KeyboardInterrupt: 

In [None]:
test_preds, test_targets, test_loss = engine.eval_fn(model, test_loader, criterion, device=device)

In [None]:
accuracy_score(test_targets, test_preds)

In [None]:
torch.save(model.state_dict(), config.SAVE_PATH+'/gcd_resnet/model_params_25_03_22.pt')

---

### Accuracy per class and confusion matrix

In [None]:
class_mapper = {
    0: '1_cumulus',
    1: '2_altocumulus',
    2: '3_cirrus',
    3: '4_clearsky',
    4: '5_stratocumulus',
    5: '6_cumulonimbus',
    6: '7_mixed',
}

In [None]:
class_acc = dict()
matrix = np.zeros((7,7), dtype=int)

for i in range(7):
    pred_index = np.where(test_targets==i)[0]
    class_acc[class_mapper[i]] = accuracy_score(test_targets[pred_index], test_preds[pred_index])
    
    for j in test_preds[pred_index]:
        matrix[i,j]+=1 

In [None]:
class_acc

In [None]:
sum_of_rows = matrix.sum(axis=1)
conf_mat = 100*matrix / sum_of_rows[:, np.newaxis]

df_cm = pd.DataFrame(conf_mat, index = class_acc.keys(),
                  columns = class_acc.keys())

plt.figure(figsize = (8,5))
plt.title("Confusion Matrix (Accuracy %)")
sns.heatmap(df_cm, annot=True)