In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import timm
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset,  WeightedRandomSampler
from torchvision import transforms
from sklearn.model_selection import train_test_split


In [2]:

df = pd.read_csv("../tags_processed_stages/dafre_tags_symbolsremoved_minlen2_minapp2_profsremoved_filledempty.csv")
df.head()
print(df['tags_cat4'].nunique())

3381


In [None]:
print(timm.list_models(pretrained=True))

In [3]:
class_counts = df['tags_cat4'].value_counts()
print(class_counts)

def contains_multiple_tags(label, separator=','):
    return separator in label
filtered_df = df[~df['tags_cat4'].apply(contains_multiple_tags, separator=',')]

class_counts = filtered_df['tags_cat4'].value_counts()
print(class_counts)

['hatsune miku']                                         10644
['hakurei reimu']                                         6691
['rumia']                                                 3803
['kochiya sanae']                                         3472
['cirno']                                                 3470
                                                         ...  
['alice margatroid', 'cardboard box gundam']                 1
['ayesha altugle', 'puni (atelier)']                         1
['akagi (azur lane)', 'shoukaku (azur lane)']                1
['maria traydor', 'rinoa heartilly']                         1
['hirasawa yui', 'kotobuki tsumugi', 'tainaka ritsu']        1
Name: tags_cat4, Length: 3381, dtype: int64
['hatsune miku']                           10644
['hakurei reimu']                           6691
['rumia']                                   3803
['kochiya sanae']                           3472
['cirno']                                   3470
                  

In [4]:
filtered_df = filtered_df.copy()
label_encoder = LabelEncoder()
filtered_df['encoded_labels'] = label_encoder.fit_transform(filtered_df['tags_cat4'])

train_df, temp_test_df = train_test_split(filteraed_df, test_size=0.2, strtify=filtered_df['encoded_labels'])
val_df, test_df = train_test_split(temp_test_df, test_size=0.5, stratify=temp_test_df['encoded_labels'])
class_counts = filtered_df['encoded_labels'].value_counts()
print(class_counts)
print(train_df['encoded_labels'].nunique())
print(val_df['encoded_labels'].nunique())
print(test_df['encoded_labels'].nunique())

818     10644
769      6691
2354     3803
1363     3472
454      3470
        ...  
1978       19
2029       18
623        17
2134       14
1087       10
Name: encoded_labels, Length: 3258, dtype: int64
3258
3258
3258


In [None]:
train_df.head()

In [5]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop((224, 224), scale=(0.8, 1)),
    transforms.TrivialAugmentWide(),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])
val_test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [6]:
#sampler
class_counts = train_df['encoded_labels'].value_counts().sort_index()
class_weights = 1. / class_counts
sweights = train_df['encoded_labels'].map(class_weights).values
sweights = torch.tensor(sweights,  dtype=torch.double)
sampler = WeightedRandomSampler(weights=sweights, num_samples=len(sweights), replacement=True)

In [None]:
train_df.head()

In [7]:
import os
import pandas as pd
from PIL import Image

class CustomImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.image_folder = "../fullMin256/"
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_folder, str(self.dataframe.iloc[idx, 1]))
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.dataframe.iloc[idx]['encoded_labels'], dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label
train_ds = CustomImageDataset(dataframe=train_df, transform=train_transforms)
test_ds = CustomImageDataset(dataframe=test_df, transform=val_test_transforms)
val_ds = CustomImageDataset(dataframe=val_df, transform=val_test_transforms)

In [8]:

import pytorch_lightning as L
import numpy as np
from pytorch_lightning.utilities.types import EVAL_DATALOADERS
class PTDM(L.LightningDataModule):
    def __init__ (self, bs, w, ms, bsa=True, pd = "", ep=0.25):
        super().__init__()
        self.save_hyperparameters()
        self.bs = bs
        self.w = w
        self.ms = ms
        self.bsa= bsa
        self.pd = pd
        self.ep = ep
        self.t_transforms = transforms.Compose([
            transforms.RandomResizedCrop(
                (224, 224), scale=(self.ms, 1)
            ),
            transforms.RandomHorizontalFlip(),
            transforms.TrivialAugmentWide(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
            transforms.RandomErasing(p=self.ep),
        ])
        self.v_transforms = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ])
        class_sample_count = np.array([len(np.where(train_df['encoded_labels'] == t)[0]) for t in np.unique(train_df['encoded_labels'])])
        weight = 1. / class_sample_count
        samples_weight = np.array([weight[t] for t in train_df['encoded_labels']])
        samples_weight = torch.from_numpy(samples_weight)
        samples_weight = samples_weight.double()
        self.sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    def setup(self, stage):
        if stage=="fit":
            self.train_dataset = CustomImageDataset(train_df, self.t_transforms)
            self.val_dataset = CustomImageDataset(val_df, self.v_transforms)
        elif stage=="test":
            self.test_dataset = CustomImageDataset(test_df, self.v_transforms)
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.bs, sampler=self.sampler, shuffle=False, num_workers=self.w, pin_memory=True, drop_last=True, persistent_workers=False)
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.bs, shuffle=False, num_workers=self.w, pin_memory=True, persistent_workers=False)
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.bs, shuffle=False, num_workers=self.w, pin_memory=True, persistent_workers=False)
dm = PTDM(bs=32, w=8, ms=0.8, bsa=True, ep=0.25)
dm.setup(stage="fit")

In [None]:
for x,y in dm.train_dataloader():
    print(x)
    print(x.size())
    print(x.dtype)
    break
for x,y in dm.val_dataloader():
    print(y)
    print(y.size())
    print(y.dtype)
    break

In [9]:
from transformers.optimization import get_cosine_schedule_with_warmup
from torchmetrics import Accuracy
import torch.nn.functional as F
from torch.nn.functional import log_softmax
from transformers.models.auto.modeling_auto import AutoModelForImageClassification
# credit Ross Wightman
class SoftTargetCrossEntropy(nn.Module):
    def __init__(self):
        super(SoftTargetCrossEntropy, self).__init__()

    def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        loss = torch.sum(-target * log_softmax(x, dim=-1), dim=-1)
        return loss.mean()
    

class PTModel(pl.LightningModule):
    def __init__(self, num_classes=3258, model_name="LETS GOOOOOOO", checkpoint=None, ft=False):
        super(PTModel, self).__init__()
        self.save_hyperparameters()
        '''
    
        self.model =  AutoModelForImageClassification.from_pretrained(
                "microsoft/beit-base-patch16-224-pt22k-ft22k",
                num_labels=3258,
                ignore_mismatched_sizes=True,
                image_size=224,
            )
            '''
        self.model = timm.create_model("beit_base_patch16_224.in22k_ft_in22k", pretrained=True, num_classes=3258)
        #self.model.head = nn.Linear(self.model.head.in_features, num_classes)
        #self.model.fc = nn.Linear(128, num_classes)
        self.acc = Accuracy(task="multiclass", num_classes=3258, top_k=1)
        self.acctop5 = Accuracy(task="multiclass", num_classes=3258, top_k=5)
        if ft == False:
            for name, param in self.model.named_parameters():
                if 'classifier' not in name and "head" not in name:
                    param.requires_grad = False
        if checkpoint:
            check = torch.load(checkpoint)
            #print(check["state_dict"])
            tsd = {}
            for k, v in check.items():
                 if k.startswith("model") and not ((k.startswith("model.classifier") or k.startswith("model.head"))):
                    k = k.replace("model" + ".", "")
                    tsd[k] = v
            self.model.load_state_dict(tsd, strict=False)

        self = self.to(memory_format=torch.channels_last)
    def forward(self, x):
        self = self.to(memory_format=torch.channels_last)
        #return self.model(pixel_values=x).logits
        return self.model(x)
    def step(self, batch, type):
        images, labels = batch
        outputs = self(images)
       # loss_fn = SoftTargetCrossEntropy()
        loss_fn = nn.CrossEntropyLoss()
        if type != "train":
            labels = F.one_hot(labels, num_classes=3258).float()
        loss = loss_fn(outputs, labels)
        self.log(f"{type}_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log(f"{type}_acc", self.acc(outputs, labels), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log(f"{type}_acctop5", self.acctop5(outputs, labels), on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss
    def training_step(self, batch, _):
        return self.step(batch, "train")

    def validation_step(self, batch, _):
        return self.step(batch, "val")

    def test_step(self, batch, _):
        return self.step(batch, "test")
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=0)
        scheduler = get_cosine_schedule_with_warmup(
                optimizer,
                num_training_steps=int(self.trainer.estimated_stepping_batches),
                num_warmup_steps=1000,
            )
        return {
            "optimizer": optimizer,
            "lr_scheduler":  {
                "scheduler": scheduler,
                "interval": "step"
            }
        }


In [10]:
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping

early_stop_callback = EarlyStopping(
   monitor='val_loss',
   min_delta=0.00,
   patience=30,
   verbose=True,
   mode='min'
)
checkpoint = ModelCheckpoint(
    filename="{epoch}-{val_acc:.4f}",
    monitor="val_acc",
    mode="max",
    save_last=True,
)
rmodel = PTModel(num_classes=3258)
trainer = pl.Trainer(default_root_dir="models/", max_steps=100000, check_val_every_n_epoch=2, accelerator='gpu', devices=1, callbacks=[early_stop_callback], precision=16)
trainer.fit(rmodel, dm)



  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
/home/leogu/.local/lib/python3.10/site-packages/lightning_fabric/connector.py:558: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2024-02-16 14:27:25.374845: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to registe

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 7.421


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 2.017 >= min_delta = 0.0. New best score: 5.404


In [None]:
model_path = 'models/noft_model.pth'

# Save the model state
torch.save(rmodel.state_dict(), model_path)

# Provide the path for downloading
model_path

In [None]:
rmodel = PTModel(num_classes=3258)
for param in rmodel.model.parameters():
    param.requires_grad = True
rmodel.load_state_dict(torch.load("models/noft_model.pth"))
trainer = pl.Trainer(max_steps=50000, val_check_interval=10000, check_val_every_n_epoch=None, accelerator='gpu', devices=1, callbacks=[checkpoint, early_stop_callback], precision=16)
trainer.fit(rmodel, dm)

In [None]:
model_path = 'models/ft_model.pth'

# Save the model state
torch.save(rmodel.state_dict(), model_path)

# Provide the path for downloading
model_path

In [None]:
'''
I am never doing any training loops again
'''
'''
def accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
    
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
model = PTModel(num_classes=3258)
model = model.to(device)
num_epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    running_corrects = 0
    running_top5_corrects = 0

    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)
    
        outputs = model(images)
        loss = criterion(outputs, labels)
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
        running_loss += loss.item() * images.size(0)
        running_corrects += acc1.item()
        running_top5_corrects += acc5.item()

    epoch_loss = running_loss / len(train_dataloader.dataset)
    epoch_acc = running_corrects / len(train_dataloader)
    epoch_acc5 = running_top5_corrects / len(train_dataloader)
    model.eval()  
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.2f}%, Top-5 Acc: {epoch_acc5:.2f}%')
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0
    val_top5_correct = 0
    
    with torch.no_grad():
        for images, labels in val_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
            val_correct += acc1.item()
            val_top5_correct += acc5.item()
            val_total += labels.size(0)

    val_epoch_loss = val_running_loss / val_total
    val_epoch_acc = val_correct / val_total
    val_epoch_top5_acc = val_top5_correct / val_total

    print(f'Validation - Loss: {val_epoch_loss:.4f}, Acc: {val_epoch_acc:.2f}%, Top-5 Acc: {val_epoch_top5_acc:.2f}%')
    '''

In [None]:
model_path = 'models/trained_model.pth'

# Save the model state
torch.save(rmodel.state_dict(), model_path)

# Provide the path for downloading
model_path

In [None]:
model = PTModel.load_from_checkpoint("best_model.ckpt")
model.eval()


In [None]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader,Dataset
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
def pre_image(image_path,model):
   img = Image.open(image_path)
   mean = [0.485, 0.456, 0.406] 
   std = [0.229, 0.224, 0.225]
   transform_norm = transforms.Compose([transforms.ToTensor(), 
   transforms.Resize((224,224)),transforms.Normalize(mean, std)])
   # get normalized image
   img_normalized = transform_norm(img).float()
   img_normalized = img_normalized.unsqueeze_(0)
   # input = Variable(image_tensor)
   img_normalized = img_normalized.to("cpu")
   # print(img_normalized.shape)
   with torch.no_grad():
      model.eval()  
      output =model(img_normalized)
      #print(output)
      probabilities, indices = torch.topk(output, 5)
      probabilities = probabilities.cpu().numpy()[0]
      #print(indices)
      #class_name = train_ds.dataframe[index]['tags_cat4']
      indices = indices.cpu().numpy()[0]
      classes = list(label_encoder.classes_)
      class_names = [classes[ind] for ind in indices]
      print(class_names)
      class_prob_pairs = sorted(zip(class_names, probabilities), key=lambda x: x[1], reverse=True)
      return class_prob_pairs
pre_image("../fullMin256/0001/1034001.jpg", model)