# Library

In [1]:
# import os
# import random
# import numpy as np
# import pandas as pd
# from PIL import Image
# from tqdm.notebook import tqdm
# from scipy import spatial
# from sklearn.model_selection import train_test_split
# import torch
# from torch import nn
# from torch.utils.data import Dataset, DataLoader
# from torch.optim.lr_scheduler import CosineAnnealingLR
# from torchvision import transforms
# import timm
# from timm.utils import AverageMeter
# import sys
# sys.path.append('../input/sentence-transformers-222/sentence-transformers')
# from sentence_transformers import SentenceTransformer
# import warnings
# warnings.filterwarnings('ignore')

import os
import random
import numpy as np
import pandas as pd
from PIL import Image
from tqdm.notebook import tqdm
from scipy import spatial
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import transforms, models
import sys
sys.path.append('../input/sentence-transformers-222/sentence-transformers')
from sentence_transformers import SentenceTransformer
import warnings
warnings.filterwarnings('ignore')

# Config

In [2]:
# class CFG:
#     model_name = 'vit_base_patch16_224'
#     input_size = 224
#     batch_size = 64
#     num_epochs = 3
#     lr = 1e-4
#     seed = 42

class CFG:
    model_name = 'resnet50'
    input_size = 224
    batch_size = 64
    num_epochs = 3
    lr = 1e-4
    seed = 42

In [3]:
# def seed_everything(seed):
#     os.environ['PYTHONHASHSEED'] = str(seed)
#     random.seed(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
    
#     if torch.cuda.is_available(): 
#         torch.cuda.manual_seed(seed)
#         torch.backends.cudnn.deterministic = True


# seed_everything(CFG.seed)

def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

seed_everything(CFG.seed)

# Dataset

In [4]:
class DiffusionDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['filepath'])
        image = self.transform(image)
        prompt = row['prompt']
        return image, prompt
    
class AverageMeter:
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


class DiffusionCollator:
    def __init__(self):
        self.st_model = SentenceTransformer(
            '/kaggle/input/sentence-transformers-222/all-MiniLM-L6-v2',
            device='cpu'
        )
    
    def __call__(self, batch):
        images, prompts = zip(*batch)
        images = torch.stack(images)
        prompt_embeddings = self.st_model.encode(
            prompts, 
            show_progress_bar=False, 
            convert_to_tensor=True
        )
        return images, prompt_embeddings
    
    
def get_dataloaders(
    trn_df,
    val_df,
    input_size,
    batch_size
):
    transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

    trn_dataset = DiffusionDataset(trn_df, transform)
    val_dataset = DiffusionDataset(val_df, transform)
    collator = DiffusionCollator()
    
    dataloaders = {}
    dataloaders['train'] = DataLoader(
        dataset=trn_dataset,
        shuffle=True,
        batch_size=batch_size,
        pin_memory=True,
        num_workers=2,
        drop_last=True,
        collate_fn=collator
    )
    dataloaders['val'] = DataLoader(
        dataset=val_dataset,
        shuffle=False,
        batch_size=batch_size,
        pin_memory=True,
        num_workers=2,
        drop_last=False,
        collate_fn=collator
    )
    return dataloaders

# Train

In [5]:
def cosine_similarity(y_trues, y_preds):
    return np.mean([
        1 - spatial.distance.cosine(y_true, y_pred) 
        for y_true, y_pred in zip(y_trues, y_preds)
    ])

In [12]:
# def train(
#     trn_df,
#     val_df,
#     model_name,
#     input_size,
#     batch_size,
#     num_epochs,
#     lr
# ):
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     dataloaders = get_dataloaders(
#         trn_df,
#         val_df,
#         input_size,
#         batch_size
#     )

#     model = timm.create_model(
#         model_name,
#         pretrained=True,
#         num_classes=384
#     )
#     model.set_grad_checkpointing()
#     model.to(device)
    
#     optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

#     ttl_iters = num_epochs * len(dataloaders['train'])
#     scheduler = CosineAnnealingLR(optimizer, T_max=ttl_iters, eta_min=1e-6)
#     criterion = nn.CosineEmbeddingLoss()
    
#     best_score = -1.0

#     for epoch in range(num_epochs):
#         train_meters = {
#             'loss': AverageMeter(),
#             'cos': AverageMeter(),
#         }
#         model.train()
#         for X, y in tqdm(dataloaders['train'], leave=False):
#             X, y = X.to(device), y.to(device)

#             optimizer.zero_grad()
#             X_out = model(X)
#             target = torch.ones(X.size(0)).to(device)
#             loss = criterion(X_out, y, target)
#             loss.backward()

#             optimizer.step()
#             scheduler.step()

#             trn_loss = loss.item()
#             trn_cos = cosine_similarity(
#                 X_out.detach().cpu().numpy(), 
#                 y.detach().cpu().numpy()
#             )

#             train_meters['loss'].update(trn_loss, n=X.size(0))
#             train_meters['cos'].update(trn_cos, n=X.size(0))

#         print('Epoch {:d} / trn/loss={:.4f}, trn/cos={:.4f}'.format(
#             epoch + 1,
#             train_meters['loss'].avg,
#             train_meters['cos'].avg))

#         val_meters = {
#             'loss': AverageMeter(),
#             'cos': AverageMeter(),
#         }
#         model.eval()
#         for X, y in tqdm(dataloaders['val'], leave=False):
#             X, y = X.to(device), y.to(device)

#             with torch.no_grad():
#                 X_out = model(X)
#                 target = torch.ones(X.size(0)).to(device)
#                 loss = criterion(X_out, y, target)

#                 val_loss = loss.item()
#                 val_cos = cosine_similarity(
#                     X_out.detach().cpu().numpy(), 
#                     y.detach().cpu().numpy()
#                 )

#             val_meters['loss'].update(val_loss, n=X.size(0))
#             val_meters['cos'].update(val_cos, n=X.size(0))

#         print('Epoch {:d} / val/loss={:.4f}, val/cos={:.4f}'.format(
#             epoch + 1,
#             val_meters['loss'].avg,
#             val_meters['cos'].avg))
        
#         if val_meters['cos'].avg > best_score:
#             best_score = val_meters['cos'].avg
#             torch.save(model.state_dict(), f'{model_name}.pth')




def train(
    trn_df,
    val_df,
    model_name,
    input_size,
    batch_size,
    num_epochs,
    lr
):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataloaders = get_dataloaders(
        trn_df,
        val_df,
        input_size,
        batch_size
    )
    
    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 384)
    model.to(device)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)

    ttl_iters = num_epochs * len(dataloaders['train'])
    scheduler = CosineAnnealingLR(optimizer, T_max=ttl_iters, eta_min=1e-6)
    criterion = nn.CosineEmbeddingLoss()

    best_score = -1.0
    
    for epoch in range(num_epochs):
        train_meters = {
        'loss': AverageMeter(),
        'cos': AverageMeter(),
    }
        model.train()
        for X, y in tqdm(dataloaders['train'], leave=False):
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            X_out = model(X)
            target = torch.ones(X.size(0)).to(device)
            loss = criterion(X_out, y, target)
            loss.backward()
            optimizer.step()
            scheduler.step()

            trn_loss = loss.item()
            trn_cos = cosine_similarity(
                X_out.detach().cpu().numpy(), 
                y.detach().cpu().numpy()
            )

            train_meters['loss'].update(trn_loss, n=X.size(0))
            train_meters['cos'].update(trn_cos, n=X.size(0))
        print('Epoch {:d} / trn/loss={:.4f}, trn/cos={:.4f}'.format(
        epoch + 1,
        train_meters['loss'].avg,
        train_meters['cos'].avg))
        
        val_meters = {
        'loss': AverageMeter(),
        'cos': AverageMeter(),
        }
        model.eval()
        for X, y in tqdm(dataloaders['val'], leave=False):
            X, y = X.to(device), y.to(device)
            
            with torch.no_grad():
                X_out = model(X)
                target = torch.ones(X.size(0)).to(device)
                loss = criterion(X_out, y, target)
                
                val_loss = loss.item()
                val_cos = cosine_similarity(
                    X_out.detach().cpu().numpy(), 
                    y.detach().cpu().numpy()
                )
            val_meters['loss'].update(val_loss, n=X.size(0))
            val_meters['cos'].update(val_cos, n=X.size(0))
            
        print('Epoch {:d} / val/loss={:.4f}, val/cos={:.4f}'.format(
            epoch + 1,
            val_meters['loss'].avg,
            val_meters['cos'].avg))
        if val_meters['cos'].avg > best_score:
            best_score = val_meters['cos'].avg
            torch.save(model.state_dict(), f'{model_name}.pth')
            
    return model



In [13]:
# df = pd.read_csv('/kaggle/input/diffusiondb-data-cleansing/diffusiondb.csv')
# trn_df, val_df = train_test_split(df, test_size=0.1, random_state=CFG.seed)

df = pd.read_csv('/kaggle/input/diffusiondb-data-cleansing/diffusiondb.csv')
trn_df, val_df = train_test_split(df, test_size=0.1, random_state=CFG.seed)



In [15]:
print(model)

None


In [16]:
# train(trn_df, val_df, CFG.model_name, CFG.input_size, CFG.batch_size, CFG.num_epochs, CFG.lr)

model = train(trn_df, val_df, CFG.model_name, CFG.input_size, CFG.batch_size, CFG.num_epochs, CFG.lr)

  0%|          | 0/2170 [00:00<?, ?it/s]

Epoch 1 / trn/loss=0.5137, trn/cos=0.4863


  0%|          | 0/242 [00:00<?, ?it/s]

Epoch 1 / val/loss=0.4878, val/cos=0.5122


  0%|          | 0/2170 [00:00<?, ?it/s]

Epoch 2 / trn/loss=0.4644, trn/cos=0.5356


  0%|          | 0/242 [00:00<?, ?it/s]

Epoch 2 / val/loss=0.4697, val/cos=0.5303


  0%|          | 0/2170 [00:00<?, ?it/s]

Epoch 3 / trn/loss=0.4316, trn/cos=0.5684


  0%|          | 0/242 [00:00<?, ?it/s]

Epoch 3 / val/loss=0.4674, val/cos=0.5326


In [17]:
model_path = '/kaggle/working/model.pth'
torch.save(model.state_dict(), model_path)

In [18]:
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from tqdm.notebook import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm

In [19]:
class CFG:
    model_path = '/kaggle/working/resnet50.pth'
    model_name = 'resnet50'
    input_size = 224
    batch_size = 64

In [20]:
class DiffusionTestDataset(Dataset):
    def __init__(self, images, transform):
        self.images = images
        self.transform = transform
    
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx])
        image = self.transform(image)
        return image

In [21]:
def predict(
    images,
    model_path,
    model_name,
    input_size,
    batch_size
):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    dataset = DiffusionTestDataset(images, transform)
    dataloader = DataLoader(
        dataset=dataset,
        shuffle=False,
        batch_size=batch_size,
        pin_memory=True,
        num_workers=2,
        drop_last=False
    )

    model = timm.create_model(
        model_name,
        pretrained=False,
        num_classes=384
    )
    state_dict = torch.load(model_path)
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    
    preds = []
    for X in tqdm(dataloader, leave=False):
        X = X.to(device)

        with torch.no_grad():
            X_out = model(X)
            preds.append(X_out.cpu().numpy())
    
    return np.vstack(preds).flatten()

In [22]:
images = list(Path('/kaggle/input/stable-diffusion-image-to-prompts/images').glob('*.png'))
imgIds = [i.stem for i in images]
EMBEDDING_LENGTH = 384
imgId_eId = [
    '_'.join(map(str, i)) for i in zip(
        np.repeat(imgIds, EMBEDDING_LENGTH),
        np.tile(range(EMBEDDING_LENGTH), len(imgIds)))]

prompt_embeddings = predict(images, CFG.model_path, CFG.model_name, CFG.input_size, CFG.batch_size)
submission = pd.DataFrame(
    index=imgId_eId,
    data=prompt_embeddings,
    columns=['val']
).rename_axis('imgId_eId')
submission.to_csv('submission.csv')

  0%|          | 0/1 [00:00<?, ?it/s]