In [1]:
import os
import gc
import cv2
import copy
import time
import random
from PIL import Image

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, train_test_split

import timm

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2

# For colored terminal text
from colorama import Fore, Back, Style
g_ = Fore.GREEN
c_ = Fore.CYAN
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

  from .autonotebook import tqdm as notebook_tqdm


# Configure W&B 

In [2]:
import wandb

wandb.login()


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnma5214[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
ROOT_DIR = "../data/googleLandmarkRetrieval/"
TRAIN_DIR = "../data/googleLandmarkRetrieval/train/"
TEST_DIR = "../data/googleLandmarkRetrieval/test/"

# Training Configuration

In [59]:
CONFIG = dict(
    seed = 42, 
    model_name = "tf_mobilenetv3_small_100", 
    train_batch_size = 1, 
    valid_batch_size = 1, 
    img_size = 224, 
    epochs = 3, 
    learning_rate = 5e-4, 
    scheduler = None, 
    weight_decay = 1e-6, 
    n_accumulate = 1, 
    n_folds = 5, 
    num_classes = 81313, 
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu"), 
    competition = "GOOGL", 
    _wandb_kernel = "deb"
)

# Set Random Seeds 

In [60]:
def set_seed(seed=42): 
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.deterministic = True 
    torch.backends.cudnn.benchmark = True 

    os.environ["PYTHONHASHSEED"] = str(seed)
set_seed(CONFIG['seed'])

In [61]:
def get_train_path(id):
    return f"{TRAIN_DIR}/{id[0]}/{id[1]}/{id[2]}/{id}.jpg"

# Read the Data

In [62]:
df = pd.read_csv(f"{ROOT_DIR}/train.csv")


In [63]:
le = LabelEncoder()
df.landmark_id = le.fit_transform(df.landmark_id)
joblib.dump(le, "label_encoder.pkl")

df['file_path'] = df['id'].apply(get_train_path)

In [64]:
df.head()

Unnamed: 0,id,landmark_id,file_path
0,17660ef415d37059,0,../data/googleLandmarkRetrieval/train//1/7/6/1...
1,92b6290d571448f6,0,../data/googleLandmarkRetrieval/train//9/2/b/9...
2,cd41bf948edc0340,0,../data/googleLandmarkRetrieval/train//c/d/4/c...
3,fb09f1e98c6d2f70,0,../data/googleLandmarkRetrieval/train//f/b/0/f...
4,25c9dfc7ea69838d,1,../data/googleLandmarkRetrieval/train//2/5/c/2...


# Visualize Images 

In [17]:
run = wandb.init(project='GLRet2021', 
                    config=CONFIG, 
                    job_type="Visualization", 
                    anonymous='must')

In [19]:
from re import L


preview_table = wandb.Table(columns=['Id', 'Image', 'Landmark ID'])
tmp_df = df.sample(3000, random_state=CONFIG['seed']).reset_index(drop=True)

for i in tqdm(range(len(tmp_df))):
    row = tmp_df.loc[i]
    img = Image.open(row.file_path)
    preview_table.add_data(row.id, wandb.Image(img), row.landmark_id)

wandb.log({"Visualization": preview_table})
run.finish()

100%|██████████| 3000/3000 [04:57<00:00, 10.08it/s]


# Split Data 

In [65]:
df_train, df_test = train_test_split(df, test_size=0.4, stratify=df.landmark_id, shuffle=True, random_state=CONFIG['seed'])

df_valid, df_test = train_test_split(df_test, test_size=0.5, shuffle=True, random_state=CONFIG['seed'])


In [66]:
df.shape, df_train.shape, df_valid.shape, df_test.shape

((1580470, 3), (948282, 3), (316094, 3), (316094, 3))

# Dataset Class 

In [67]:
class LandmarkDataset(Dataset):
    def __init__(self, root_dir, df, transforms=None) -> None:
        super().__init__()
        self.root_dir = root_dir
        self.df = df 
        self.file_names = df["file_path"].values
        self.labels = df['landmark_id'].values 
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
            img_path = self.file_names[index]
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            label = self.labels[index]

            if self.transforms:
                img = self.transforms(image=img)["image"]
            
            return img, label 

# Augmentations 

In [68]:
data_transforms = {
    "train": A.Compose([
        A.Resize(CONFIG["img_size"], CONFIG["img_size"]), 
        A.HorizontalFlip(p=.5), 
        A.CoarseDropout(p=.5), 
        A.Normalize(max_pixel_value=255.0, p=1.0), 
        ToTensorV2()
    ], p=1.), 

    "valid": A.Compose([
        A.Resize(CONFIG["img_size"], CONFIG["img_size"]), 
        A.Normalize(max_pixel_value=255.0, p=1.0), 
        ToTensorV2()
    ], p=1.)

}

# Create Model 

In [69]:
class LandmarkRetrievalModel(nn.Module):
    def __init__(self, model_name, pretrained=True) -> None:
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.n_features = self.model.classifier.in_features
        self.model.reset_classifier(0)
        self.fc = nn.Linear(self.n_features, CONFIG["num_classes"])

    def forward(self, x):
        features = self.model(x)
        output = self.fc(features)
        return output 

    def extract_features(self, x):
        features = self.model(x)
        return features

model = LandmarkRetrievalModel(CONFIG["model_name"])
model.to(CONFIG['device'])


RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 3.95 GiB total capacity; 2.56 GiB already allocated; 576.00 KiB free; 2.85 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# Loss Function

In [None]:
def criterion(outputs, targets):
    return nn.CrossEntropyLoss()(outputs, targets)

# Training Function

In [None]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    scaler = amp.GradScaler()

    dataset_size = 0
    running_loss = 0.0 

    bar = tqdm(enumerate(dataloader), total=len(dataloader))

    for step, (images, labels) in bar: 
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.long)

        batch_size = images.size(0)

        with amp.autocast(enabled=True):
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss = loss / CONFIG["n_accumulate"]

        scaler.scale(loss).backward() 

        if (step + 1) % CONFIG["n_accumulate"] == 0:
            scaler.step(optimizer)
            scaler.update()

            for p in model.parameters():
                p.grad = None 

            
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss, LR=optimizer.param_groups[0]["lr"])
    
    gc.collect() 
    return epoch_loss

# Validation Function

In [70]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()

    dataset_size = 0 
    running_loss = 0.0 

    TARGETS = [] 
    PREDS = [] 

    bar = tqdm(enumerate(dataloader), total=len(dataloader))

    for step, (images, labels) in bar:
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.long)

        batch_size = images.size(0)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        PREDS.appends(preds.view(-1).cpu().detach().numpy())
        TARGETS.append(labels.view(-1).cpu().detach().numpy())

        bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss)

        TARGETS = np.concatenate(TARGETS)
        PREDS = np.concatenate(PREDS)
        val_acc = accuracy_score(TARGETS, PREDS)
        gc.collect() 

        return epoch_loss, val_acc

# Run Training 

In [71]:
def run_training(model, optimizier, scheduelr,
                train_dataloader,
                val_dataloader,
                device,
                num_epochs, 
                run):

    wandb.watch(model, log_freq=100)

    if torch.cuda.is_available:
        print(f"[INFO] using GPU: {torch.cuda.get_device_name()}\n")

        start = time.time() 
        best_model_wts = copy.deepcopy(model.state_dict())

        best_epoch_acc = 0 
        history = defaultdict(list)

        for epoch in range(1, num_epochs+1):
            gc.collect() 
            train_epoch_loss = train_one_epoch(model, optimizier, scheduelr,
                                                dataloader=train_dataloader,
                                                device=CONFIG["device"],
                                                epoch=epoch )

            val_epoch_loss, val_epoch_acc = valid_one_epoch(model,
                                            dataloader=val_dataloader, 
                                            device=CONFIG["device"], 
                                            epoch=epoch)

            history["Train_Loss"].append(train_epoch_loss)
            history["Valid_Loss"].append(val_epoch_loss)
            history["Valid_acc"].append(val_epoch_acc)

            wandb.log({"Train Loss": train_epoch_loss})
            wandb.log({"Valid Loss": val_epoch_loss})
            wandb.log({"Valid Acc": val_epoch_acc})

            print(f"Valid Acc: {val_epoch_acc}")

            if val_epoch_acc >= best_epoch_acc: 
                print(f"{c_}Validation Acc Improved ({best_epoch_acc} ---> {val_epoch_acc})")
                best_epoch_acc = val_epoch_acc
                run.summary["Best Accuracy"] = best_epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                PATH = "ACC{:.4f}_epoch{:.0f}.bin".format(best_epoch_acc, epoch)
                torch.save(model.state_dict(), PATH)
                wandb.save(PATH)
                print(f"Model Save{sr_}")
            
            print()
    
    end = time.time()
    time_elapsed = end - start 

    print("Trainign Complete in {:.0f}h {:.f}m {:.0f}s".format(time_elapsed//3600, (time_elapsed%3600)//60, (time_elapsed%3600)%60))

    print("Best ACC: {:.4f}".format(best_epoch_acc))

    model.load_state_dict(best_model_wts)

    return model, history

    
        



In [72]:
def prepare_loaders():    
    train_dataset = LandmarkDataset(TRAIN_DIR, df_train, transforms=data_transforms['train'])
    valid_dataset = LandmarkDataset(TRAIN_DIR, df_valid, transforms=data_transforms['valid'])

    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                              num_workers=4, shuffle=True, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=4, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

train_dataloder, valid_dataloader = prepare_loaders()

In [73]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['T_max'], 
                                                   eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CONFIG['T_0'], 
                                                             T_mult=1, eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

In [74]:
optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
scheduler = fetch_scheduler(optimizer)



In [75]:
run = wandb.init(project='GLRet2021', 
                 config=CONFIG,
                 job_type='Train',
                 anonymous='must')

In [76]:
model, history = run_training(model, optimizer, 
                            scheduler,
                            train_dataloder,
                            valid_dataloader, 
                            CONFIG["device"],
                            CONFIG["epochs"], 
                            run)
                            

[INFO] using GPU: <function get_device_name at 0x7f8b106c0d30>



  0%|          | 0/948282 [00:00<?, ?it/s]


RuntimeError: Input type (torch.cuda.HalfTensor) and weight type (torch.FloatTensor) should be the same