In [None]:
import numpy as np
import glob
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
import torchvision
import torchvision.transforms.v2 as T
import os
from matplotlib import pyplot as plt
import pandas as pd
import wandb
!wandb login 

if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    print("Using the GPU 😊")
else:
    DEVICE = torch.device("cpu")
    print("Using the CPU 😞")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
Using the GPU 😊


In [3]:
NETWORK_SIZE = (40, 40)
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
LABELS_CNT = 200
DEFAULT_TRANSFORM = T.Compose(
    [
        T.ToImage(),
        T.ToDtype(torch.float32, scale=True),
        T.Resize(size=NETWORK_SIZE),
        T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ]
)

DEFAULT_TRANSFORM_AUG = T.Compose(
    [
    T.ToImage(),
        T.RandomHorizontalFlip(),
        T.RandomResizedCrop(size=NETWORK_SIZE, scale=(0.8, 1.0), ratio=(0.8, 1.2)),
        T.TrivialAugmentWide(),
        T.RandAugment(),
        T.RandomErasing(),
        #T.RandomChoice([
            #T.AutoAugment(),
        #]),
        #RandomErasing
        #TrivialAugmentWide
        #Elastic
        #Affine
        T.ToDtype(torch.float32, scale=True),
        T.Resize(size=NETWORK_SIZE),
        T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ]
)


class ImgDataset(Dataset):
    def __init__(self, img_dir, gt : dict, mode, train_fraction=0.9, rnd_seed=42):
        img_paths = glob.glob(f'{img_dir}/*')
        labels = np.array([gt[img_path.split('/')[-1]] for img_path in img_paths])

        X_train, X_valid = train_test_split(img_paths, train_size=train_fraction, random_state=rnd_seed, stratify=labels)
        
        if mode == 'train':
            self._paths = X_train
        elif mode == 'valid':
            self._paths = X_valid
        else:
            raise RuntimeError(f"Invalid mode: {mode!r}")

        self._len = len(self._paths)
        self._gt = gt
        self._mode = mode

    def __getitem__(self, index):
        img_path = self._paths[index]
        filename = img_path.split('/')[-1]
        
        image = torchvision.io.read_image(img_path)

        if self._mode == 'train':
            image = DEFAULT_TRANSFORM_AUG(image)
        else:
            image = DEFAULT_TRANSFORM(image)
        return image, self._gt[filename]

    def __len__(self):
        return self._len

In [4]:
def read_csv(filename):
    res = {}
    with open(filename) as fhandle:
        next(fhandle)
        for line in fhandle:
            parts = line.rstrip('\n').split(',')
            label = int(parts[1])
            res[parts[0]] = label
    return res

train_gt = read_csv('/kaggle/input/bhw-1-dl-2024-2025/bhw1/labels.csv')

In [5]:
def train_detector(train_gt : dict, train_img_dir : str, run_name=None,
                   epoch_cnt=10, lr=0.01, cos_restart=10, 
                   batch_size=32, weight_decay=5e-4, sgd_momentum=0.9,
                   chan_mult=2, layers=[4, 4, 4], dropout=[0.15, 0.25, 0.35, 0.60],
                   model_state=None):
    wandb.init(name=run_name, project="DL LHW 1")
    
    config = wandb.config          # Initialize config
    config.batch_size = batch_size          # input batch size for training (default: 64)
    config.epochs = epoch_cnt             # number of epochs to train (default: 10)
    config.lr = lr               # learning rate (default: 0.01)
    config.cos_restart = cos_restart               # learning rate (default: 0.01)
    config.momentum = sgd_momentum          # SGD momentum (default: 0.5)
    config.weight_decay = weight_decay
    config.chan_mult = chan_mult
    config.layers = layers
    config.dropout = dropout

    ds_train = ImgDataset(train_img_dir, train_gt, mode="train", train_fraction=0.9)
    ds_valid = ImgDataset(train_img_dir, train_gt, mode="valid", train_fraction=0.9)
    dl_train = DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=os.cpu_count()-1, pin_memory=True)
    dl_valid = DataLoader(ds_valid, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=os.cpu_count()-1, pin_memory=True)

    model = ClfModel(chan_mult, layers, dropout).to(DEVICE)
    model = nn.DataParallel(model)
    if model_state is not None:
        model.load_state_dict(model_state)
    loss_fn = torch.nn.CrossEntropyLoss().to(DEVICE)
    #optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    optimizer = torch.optim.SGD(model.parameters(), momentum=sgd_momentum, lr=lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, cos_restart)
    best_val_loss = np.inf
    for epoch in range(epoch_cnt):
        progress_train = tqdm(
            total=len(dl_train),
            desc=f"Epoch {epoch}",
            leave=False,
        )
        
        train_loss_ls = []
        train_acc = 0
        model = model.train()
        batch_ind = 0
        train_img_cnt = len(ds_train) // batch_size * batch_size
        for x_batch, y_batch in dl_train:
            x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
            optimizer.zero_grad(set_to_none=True)
            probs = model(x_batch)
            loss = loss_fn(probs, y_batch)  # Calc loss
            loss.backward()
            optimizer.step() # Backward
            scheduler.step(epoch + batch_ind/train_img_cnt)
            
            train_loss_ls.append(loss.detach())
            train_acc += (probs.detach().argmax(axis=1) == y_batch).sum()
            progress_train.update()
            batch_ind += 1
        train_acc = 100*train_acc/train_img_cnt
        print(
            f"Epoch {epoch},",
            f"train_loss: {torch.stack(train_loss_ls).mean().item():.8f}",
            f"train_accuracy: {train_acc:.8f}",
        )

        progress_train.close()

        progress_valid = tqdm(
            total=len(dl_valid),
            desc=f"Epoch {epoch}",
            leave=False,
        )
        model = model.eval()
        valid_loss_ls = []
        valid_acc = 0
        for x_batch, y_batch in dl_valid:
            x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
            with torch.no_grad():
                probs = model(x_batch)
                loss = loss_fn(probs, y_batch)  # Calc loss
            valid_loss_ls.append(loss.detach())
            valid_acc += (probs.detach().argmax(axis=1) == y_batch).sum()
            progress_valid.update()
                    
        progress_valid.close()

        val_loss = torch.stack(valid_loss_ls).mean().item()
        valid_acc = (100*valid_acc/len(ds_valid)).item()
        print(
            f"Epoch {epoch},",
            f"valid_loss: {val_loss:.8f}",
            f"valid_accuracy: {valid_acc:.8f}",
        )
        if epoch % 10 == 0:
            torch.save(model.state_dict(), f'model_checkpoint_{run_name}_e{epoch}.pt')
            wandb.save(f'/kaggle/working/model_checkpoint_{run_name}_e{epoch}.pt')
        if best_val_loss > val_loss and val_loss < 2.1:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'model.pt')
            wandb.save('model.pt')
        print(scheduler.get_last_lr())
        wandb.log({
            "Learning rate" : scheduler.get_last_lr()[-1],
            "Train Accuracy": train_acc.item(),
            "Train Loss": torch.stack(train_loss_ls).mean().item(),
            "Valid Accuracy": valid_acc,
            "Valid Loss": val_loss
        })
    
    wandb.finish()

    return model

In [6]:
class CNNBlock(nn.Module):
    def __init__(self, in_c, out_c, layers=4, dropout=0.0, kernel=3, reduce=True) -> None:
        super().__init__()
        
        self.convs = []
        self.bns = []
        for i in range(layers):
            self.convs.append(nn.Conv2d(in_c if i == 0 else out_c, out_c, kernel, padding='same', bias=False))
            self.bns.append(nn.BatchNorm2d(out_c, momentum=0.1))
        self.convs = nn.Sequential(*self.convs)
        self.bns = nn.Sequential(*self.bns)
    
        self.conv_width = nn.Conv2d(in_c, out_c, 1)
        self.activation = nn.ELU(inplace=True)
        
        self.head = [nn.Dropout(p=dropout)]
        if reduce:
            self.head = [nn.MaxPool2d(kernel_size=2, stride=2)] + self.head
            
        self.head = nn.Sequential(*self.head)
        
    def forward(self, x):
        for num, conv, bn in zip(range(len(self.convs)), self.convs, self.bns):
            ident = self.conv_width(x) if num == 0 else x
            x = bn(conv(x))
            x = x + ident
            self.activation(x)

        return self.head(x)


class ClfModel(nn.Module):
    def __init__(self, chan_mult=2, layers=[4, 4, 4], dropout=[0.15, 0.25, 0.35, 0.60]) -> None:
        super().__init__()
        c_size = np.array([32, 64, 128]) * chan_mult
        
        self.blocks = nn.Sequential(
            CNNBlock(3, c_size[0], layers=layers[0], dropout=dropout[0]),  # 0.15
            CNNBlock(c_size[0], c_size[1], layers=layers[1], dropout=dropout[1]), # 0.25
            CNNBlock(c_size[1], c_size[2], layers=layers[2], dropout=dropout[2]) # 0.35
        )
        self.pt_wise_convs = nn.Sequential(
            nn.Conv2d(3, c_size[0], 1, stride=2),
            nn.Conv2d(c_size[0], c_size[1], 1, stride=2),
            nn.Conv2d(c_size[1], c_size[2], 1, stride=2)
        )
            
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(c_size[-1] * (NETWORK_SIZE[0] // 2**3)**2, 800),
            nn.LeakyReLU(),
            nn.BatchNorm1d(800, momentum=0.1),
            nn.Dropout(p=dropout[3]), # 0.6
            nn.Linear(800, LABELS_CNT),
        )

    def forward(self, x):
        for block, pt_wise in zip(self.blocks, self.pt_wise_convs):
            ident = x
            x = block(x) + pt_wise(ident)
            
        return self.head(x)


In [33]:
wandb.finish()

0,1
Learning rate,▁
Train Accuracy,▁
Train Loss,▁
Valid Accuracy,▁
Valid Loss,▁

0,1
Learning rate,0.005
Train Accuracy,1.81588
Train Loss,5.39696
Valid Accuracy,6.25
Valid Loss,4.83066


In [11]:
#model_state = model.state_dict() 
model_state = torch.load('/kaggle/input/big-good/model-4.pt', DEVICE, weights_only=True)

In [None]:
model = train_detector(
    train_gt, 
    '/kaggle/input/bhw-1-dl-2024-2025/bhw1/trainval',
    run_name='Good big 2',
    epoch_cnt=30, 
    lr=0.001,
    cos_restart=10, 
    batch_size=64,
    weight_decay=5e-4, 
    sgd_momentum=0.9,
    chan_mult=5, 
    layers=[4, 7, 5],
    dropout=[0.15, 0.25, 0.35, 0.6],
    model_state=model_state#model_state
)

In [7]:
def calc_ans(model_filename, test_img_dir):
    model = ClfModel(5, [4, 7, 5], [0.15, 0.25, 0.35, 0.6]).to(DEVICE)
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(model_filename, DEVICE, weights_only=True))
    model = model.eval()
    results = dict()
    img_paths = glob.glob(f'{test_img_dir}/*')
    progress_train = tqdm(
        total=len(img_paths),
        leave=False,
    )
    for img_path in img_paths:
        filename = img_path.split('/')[-1]
        image = torchvision.io.read_image(img_path)
        image = DEFAULT_TRANSFORM(image).to(DEVICE)
        with torch.no_grad():
            label = model(image[None, ...])[0].argmax().cpu()
        results[filename] = int(label)
        progress_train.update()
    progress_train.close()
    
    return results

In [8]:
answer = calc_ans('/kaggle/input/weights-new/model_checkpoint_Good big 2_e20.pt', '/kaggle/input/bhw-1-dl-2024-2025/bhw1/test')
pd.DataFrame({
    "Id": answer.keys(),
    "Category": answer.values()
}).to_csv("submission.csv", index=False)

  0%|          | 0/10000 [00:00<?, ?it/s]