In [2]:
import os
os.chdir('../src')

In [3]:
import glob, cv2
from tqdm import tqdm

import numpy as np
import pandas as pd

import sys, os, time, logging, datetime, random
from pathlib import Path

import torch
import torch.nn.functional as F
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler, RandomSampler

from config import _C as cfg
from models.create_model import CustomNet

from data_builder import build_valid_loader, build_train_loader
from data_builder.transforms import get_valid_transform, get_test_transform
from models.optimizer import make_optimizer
from models.scheduler import make_scheduler
from models.create_model import CustomNet

#TODO: provare ad usare questo
from models.loss import BiTemperedLogisticLoss

from pytorch_lightning.callbacks import ModelCheckpoint

In [4]:
df = pd.read_csv(
    '/home/giorgio/Scrivania/Kaggle/cassava_leaf/data/train_folds.csv'
)

In [5]:
class cassavaTest(Dataset):

    def __init__(self, df, cfg, transforms=None, preprocessing=None):

        self.df = df
        self.cfg = cfg
        self.transforms = transforms

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx:int):
        row = self.df.iloc[idx]
        img_id = row['image_id']

        path_img = img_id

        img = cv2.imread(
            path_img, cv2.IMREAD_COLOR
        )

        if self.transforms:
            augmented = self.transforms(image=img)
            img = augmented['image']
        
        return img, img_id

In [6]:
def load_model(cfg, fold):
    checkpoint = torch.load(
        f'/home/giorgio/Scrivania/Kaggle/cassava_leaf/experiments/tf_efficientnet_b3_ns/2021-01-15/tf_efficientnet_b3_ns_fld{fold}.ckpt',
    )
    model = CustomNet(cfg)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(cfg.DEVICE)
    model.eval()
    del checkpoint
    return model

In [1]:
data = pd.DataFrame(columns=['image_id', 'pred_c', 'pred_p'])

for fld in range(cfg.DATASET.N_SPLITS):
    print(f'Prediction on fold {fld}')
    img_id = df[df['fold']==fld]['image_id']

    data = cassavaTest(
        df[df['fold']==fld], 
        cfg, 
        transforms=get_test_transform(cfg)
    )

    dl = DataLoader(
        dataset=data,
        sampler=SequentialSampler(data),
        drop_last=False,
        batch_size=cfg.TRAIN_LOADER.BATCH_SIZE,
        num_workers=cfg.TRAIN_LOADER.NUM_WORKERS,
        pin_memory=True
    )

    model = load_model(cfg, fld)

    predicted_class = []
    predicted_prob = []

    test_loader = tqdm(dl, total=len(dl))
    for cnt, (imgs, ids) in enumerate(test_loader):

        with torch.no_grad():
            imgs = imgs.to(cfg.DEVICE)
            logits = model(imgs)

        preds = F.softmax(logits, -1).detach().cpu().numpy()

        pred_c = np.argmax(a=preds, axis=1)
        pred_p = np.max(a=preds, axis=1)

        predicted_class.append(pred_c)
        predicted_prob.append(pred_p)

    predicted_class = np.concatenate(predicted_class)
    predicted_prob =  np.concatenate(predicted_prob)

    dataset = pd.DataFrame()
    dataset['pred_c'] = predicted_class
    dataset['pred_p'] = predicted_prob
    dataset['image_id'] = img_id

    data.append(dataset)

NameError: name 'pd' is not defined