In [1]:
import os
import torch
import random
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from io import BytesIO
from base64 import b64decode
import pandas as pd
import numpy as np
from timm import create_model
from sklearn.model_selection import train_test_split
import tqdm.notebook as tq
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [2]:
df = pd.read_csv("../../post2ctr_dataset.csv")

max_size = 224

transform = A.Compose([
    A.Resize(max_size, max_size),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
        
    A.RandomBrightnessContrast(p=0.75),
    A.ShiftScaleRotate(rotate_limit=360,shift_limit=0.15,scale_limit=0.1,p=0.75),
    A.OneOf([
        A.GaussNoise(var_limit=[10, 50]),
        A.GaussianBlur(),
        A.MotionBlur(),
    ], p=0.4),
    A.CoarseDropout(max_holes=2, max_width=int(max_size * 0.2), max_height=int(max_size * 0.2), mask_fill_value=0, p=0.5),
    
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(transpose_mask=True),
])

class TemplateDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        cell = self.df.iloc[idx]
        
        img = Image.open(BytesIO(b64decode(cell["photo"])))
        img = np.asarray(img.convert("RGB"))
        img = self.transform(image=img)["image"]
        
        target = cell["open_photo"] / cell["view"] if cell["view"] > 0 else 0

        return {
            "img" : img,
            "target" : target
        }

In [3]:
seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = TemplateDataset(train_df, transform=transform)
test_dataset = TemplateDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [4]:
class ConVNext(nn.Module):
    def __init__(self):
        super(ConVNext, self).__init__()
        convnext = create_model("convnext_tiny", pretrained=True)
        self.image_model = convnext
        self.image_model.head.fc = nn.Linear(in_features=768, out_features=1, bias=True)
        self.answer = nn.Sigmoid()

    def forward(self, image_input):
        return self.answer(self.image_model(image_input))

In [5]:
model = ConVNext()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCELoss()

In [6]:
def train_model(model, train_loader, optimizer):
    running_loss = 0.0
    model.train()
    loop = tq.tqdm(enumerate(train_loader), total=len(train_loader), leave=True, colour="steelblue")
    for batch_idx, data in loop:
        images = data["img"].to(device)
        targets = data["target"].to(device, dtype=torch.float).squeeze()
                
        outputs = model(images).squeeze()
        loss = criterion(outputs, targets)

        running_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

    return model, running_loss / len(train_loader)

In [7]:
def eval_model(model, test_loader):
    running_loss = 0.0
    model.eval()
    with torch.no_grad():
        for batch_idx, data in enumerate(test_loader, 0):
            images = data["img"].to(device)
            targets = data["target"].to(device, dtype=torch.float).squeeze()
                    
            outputs = model(images).squeeze()
            loss = criterion(outputs, targets)
            running_loss += loss.item()

    return running_loss / len(test_loader)

In [8]:
epochs = 5
best_loss = float("inf")
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    model, train_loss = train_model(model, train_loader, optimizer)
    test_loss = eval_model(model, test_loader)

    print(f"Train Loss = {train_loss:.4f}, Test Loss = {test_loss:.4f}")
    if test_loss < best_loss:
        torch.save(model.state_dict(), "../../convnext_recsys.pth")

Epoch 1/5


  0%|          | 0/2353 [00:00<?, ?it/s]

Train Loss = 0.1697, Test Loss = 0.1615
Epoch 2/5


  0%|          | 0/2353 [00:00<?, ?it/s]

Train Loss = 0.1687, Test Loss = 0.1612
Epoch 3/5


  0%|          | 0/2353 [00:00<?, ?it/s]

Train Loss = 0.1685, Test Loss = 0.1612
Epoch 4/5


  0%|          | 0/2353 [00:00<?, ?it/s]

Train Loss = 0.1685, Test Loss = 0.1617
Epoch 5/5


  0%|          | 0/2353 [00:00<?, ?it/s]

Train Loss = 0.1692, Test Loss = 0.1616
