In [None]:
# import all necessory libraries
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import gc         # garbage collect library


import timm
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform


from PIL import Image


from sklearn.decomposition import PCA


Below we will setup device to cpu or repestive gpu as available.\
Note that there is library named **cuML** which does the SVR calculation faster (using GPU accelerators). While experimenting it was only working with **Tesla T4 GPU**, hence the code below will setup the SVR if the Tesla T4 GPU is available else **sklearn** can be use to compute SVR.

In [None]:
if torch.cuda.is_available():
    device = 'cuda'
    device_name = torch.cuda.get_device_name()
else:
    device = 'cpu'
    device_name = 'cpu'

print('device : ', device)
print('name   : ', device_name)

#device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device_name = torch.cuda.get_device_name()

if device == 'cuda' and device_name=='Tesla T4':
    print('\n--- cuML ---')
    import cuml
    from cuml.svm import SVR
else:
    print('\n--- sklearn ---')
    from sklearn.svm import SVR

device :  cuda
name   :  Tesla T4

--- cuML ---


In [None]:
directory = "/kaggle/input/petfinder-pawpularity-score"
Extracted_Data_directory = '/kaggle/input/extracted-features-pawpularity'

train_df = pd.read_csv(os.path.join(directory, 'train.csv'))

test_df = pd.read_csv(os.path.join(directory, 'test.csv'))

print('Train samples: ', len(train_df),
      '\nTrain samples: ', len(test_df), '\n')


Train samples:  9912 
Train samples:  8 



# Specify Inputs (all models)

To save time while training, every model's output is saved in {model_name}.npz files with respective model names. If required to extract the output from model instead of saved .npz, make the necessary changes in below code

In [None]:
ExtractFromModel = False
Save = False
ExtractFromSaved = True

ExtractFrom = [ExtractFromModel, Save, ExtractFromSaved]

# Feature Extractor for All model

In [None]:
def ExtractModelFeature(Dataloader, model, Train_PCA=False):
    X = []
    for img in tqdm(Dataloader):
        with torch.no_grad():
            if model.__class__.__name__ == 'EfficientNet':
                x = model(img.to(device))
            elif model.__class__.__name__ == 'CLIPModel':
                x = model.get_image_features(**img.to(device))
            elif model.__class__.__name__ == 'VisionTransformer':
                x = model(img.to(device))
            elif model.__class__.__name__ == 'SwinTransformer':
                x = model(img.to(device))
            elif model.__class__.__name__ == 'Beit':
                x = model(img.to(device))
            elif model.__class__.__name__ == 'PCA':
                x = img
            else:
                raise Exception("Check if model is implimented !")

        X.append(x.cpu().detach().numpy())

    X = np.concatenate(X, axis=0)

    if model.__class__.__name__ == 'PCA':
        if Train_PCA:
            model.fit(X)
            X = model.transform(X)
            return X, model
        else:
            return model.transform(X)

    return X

In [None]:
Train_Features_dict = {}
Test_Features_dict = {}

Train_Features_dict_Flip = {}
Test_Features_dict_Flip = {}

# EfficientNet

In [None]:

class dataset_EfficientNet:
    def __init__(self, df, directory, transform, doflip=False, test=False):
        self.df = df
        self.directory = directory
        self.transform = transform
        self.doflip = doflip
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        split = 'test' if self.test else 'train'
        filename = self.df.Id[idx]
        address = os.path.join(self.directory, split, filename+'.jpg')
        img = Image.open(address).convert('RGB')

        if self.doflip==True:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            width, height = img.size
            img = img.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))

        image = self.transform(img) # transform and add batch dimension

        return image


In [None]:
## EfficientNet_L2

ckp_path = '/kaggle/input/tf-efficientnet/pytorch/tf-efficientnet-l2-ns-475/1/tf_efficientnet_l2_ns_475-bebbd00a.pth'
model_eff_2 = timm.create_model('tf_efficientnet_l2_ns_475', checkpoint_path=ckp_path)

config = resolve_data_config({}, model=model_eff_2)
transform_2 = create_transform(**config)

model_eff_2.eval()
model_eff_2 = model_eff_2.to(device)


  model = create_fn(


In [None]:
from torchinfo import summary
print(summary(model_eff_2))

Layer (type:depth-idx)                        Param #
EfficientNet                                  --
├─Conv2dSame: 1-1                             3,672
├─BatchNormAct2d: 1-2                         272
│    └─Identity: 2-1                          --
│    └─SiLU: 2-2                              --
├─Sequential: 1-3                             --
│    └─Sequential: 2-3                        --
│    │    └─DepthwiseSeparableConv: 3-1       20,850
│    │    └─DepthwiseSeparableConv: 3-2       8,802
│    │    └─DepthwiseSeparableConv: 3-3       8,802
│    │    └─DepthwiseSeparableConv: 3-4       8,802
│    │    └─DepthwiseSeparableConv: 3-5       8,802
│    │    └─DepthwiseSeparableConv: 3-6       8,802
│    └─Sequential: 2-4                        --
│    │    └─InvertedResidual: 3-7             97,858
│    │    └─InvertedResidual: 3-8             171,210
│    │    └─InvertedResidual: 3-9             171,210
│    │    └─InvertedResidual: 3-10            171,210
│    │    └─InvertedRe

In [None]:

def EfficientNet_Extract(train_df, test_df, transform, model_eff, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict):
    print('\n---Extracting features: ' + Filename.split('.')[0] + '---')
    ExtractFromModel, Save, ExtractFromSaved = ExtractFrom


    if ExtractFromModel and not ExtractFromSaved:
        train_dataset = dataset_EfficientNet(train_df, directory, transform, doflip=doflip, test=False)
        train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)

        test_dataset = dataset_EfficientNet(test_df, directory, transform, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


        X = ExtractModelFeature(train_dataloader, model_eff)
        X_test = ExtractModelFeature(test_dataloader, model_eff)

        if Save==True:
            with open(Filename, 'wb') as f:
                np.save(f, X)
                np.save(f, X_test)

    elif ExtractFromSaved and not ExtractFromModel:
        FilePath = os.path.join(Extracted_Data_directory, Filename)
        with open(FilePath, 'rb') as f:
            X = np.load(f)

        test_dataset = dataset_EfficientNet(test_df, directory, transform, doflip=doflip, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)
        X_test = ExtractModelFeature(test_dataloader, model_eff)


    else:
        raise Exception("Please Select appropriate option")


    if ExtractFromModel or ExtractFromSaved:
        print('train: ', X.shape)
        print('test: ', X_test.shape)

        name = Filename.split('.')[0]
        Train_Features_dict[name] = X
        Test_Features_dict[name] = X_test


In [None]:

doflip = False

## EfficientNet_l2_ns_475
Filename = 'EfficientNet_l2_ns_475.npy'
EfficientNet_Extract(train_df, test_df, transform_2, model_eff_2, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)


In [None]:
doflip = True

## EfficientNet_l2_ns_475_Flip
Filename = 'EfficientNet_l2_ns_475_Flip.npy'
EfficientNet_Extract(train_df, test_df, transform_2, model_eff_2, ExtractFrom, doflip, Filename, Train_Features_dict_Flip, Test_Features_dict_Flip)


In [None]:
del model_eff_2
gc.collect()
torch.cuda.empty_cache()

# ViT

In [None]:
# Vit Model
MODEL_PATH_1 = "/kaggle/input/vit-base-models-pretrained-pytorch/jx_vit_base_p16_224-80ecf9dd.pth"
model_vit_1 = timm.create_model("vit_base_patch16_224", pretrained=False)
model_vit_1.load_state_dict(torch.load(MODEL_PATH_1))

config = resolve_data_config({}, model=model_vit_1)
transform_1 = create_transform(**config)

model_vit_1 = model_vit_1.to(device)


# Vit Model
MODEL_PATH_2 = "/kaggle/input/vit-base-models-pretrained-pytorch/jx_vit_base_p16_384-83fb41ba.pth"
model_vit_2 = timm.create_model("vit_base_patch16_384", pretrained=False)
model_vit_2.load_state_dict(torch.load(MODEL_PATH_2))

config = resolve_data_config({}, model=model_vit_2)
transform_2 = create_transform(**config)

model_vit_2 = model_vit_2.to(device)

In [None]:
# Vit Model
MODEL_PATH_3 = "/kaggle/input/vit-base-models-pretrained-pytorch/jx_vit_base_p32_384-830016f5.pth"
model_vit_3 = timm.create_model("vit_base_patch32_384", pretrained=False)
model_vit_3.load_state_dict(torch.load(MODEL_PATH_3))

config = resolve_data_config({}, model=model_vit_3)
transform_3 = create_transform(**config)

model_vit_3 = model_vit_3.to(device)

In [None]:
class dataset_ViT:
    def __init__(self, df, directory, transform, doflip=False, test=False):
        self.df = df
        self.directory = directory
        self.transform = transform
        self.doflip = doflip
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        split = 'test' if self.test else 'train'
        filename = self.df.Id[idx]
        address = os.path.join(self.directory, split, filename+'.jpg')
        image = Image.open(address).convert('RGB')
        if self.doflip==True:
            image = image.transpose(Image.FLIP_LEFT_RIGHT)
            width, height = image.size
            image = image.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))

        image = self.transform(image) # transform and add batch dimension

        return image


In [None]:
def ViT_Extract(train_df, test_df, transform, model_vit, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict):
    print('\n--- Extrcting features: ' + Filename.split('.')[0] + ' ---')
    ExtractFromModel, Save, ExtractFromSaved = ExtractFrom
    if ExtractFromModel and not ExtractFromSaved:
        train_dataset = dataset_ViT(train_df, directory, transform, doflip=doflip, test=False)
        train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=False)

        test_dataset = dataset_ViT(test_df, directory, transform, doflip=False, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

        X = ExtractModelFeature(train_dataloader, model_vit)
        X_test = ExtractModelFeature(test_dataloader, model_vit)

        if Save==True:
            with open(Filename, 'wb') as f:
                np.save(f, X)
                np.save(f, X_test)


    elif ExtractFromSaved and not ExtractFromModel:
        FilePath = os.path.join(Extracted_Data_directory, Filename)

        with open(FilePath, 'rb') as f:
            X = np.load(f)

        test_dataset = dataset_ViT(test_df, directory, transform, doflip=doflip, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False)
        X_test = ExtractModelFeature(test_dataloader, model_vit)


    else:
        raise Exception("Please Select appropriate option")


    if ExtractFromModel or ExtractFromSaved:
        print('train: ', X.shape)
        print('test: ', X_test.shape)

        name = Filename.split('.')[0]
        Train_Features_dict[name] = X
        Test_Features_dict[name] = X_test


In [None]:

doflip = False

## ViT_p16_224
Filename = 'ViT_p16_224.npy'
ViT_Extract(train_df, test_df, transform_1, model_vit_1, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)
'''
## ViT_p16_384
Filename = 'ViT_p16_384.npy'
ViT_Extract(train_df, test_df, transform_2, model_vit_2, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)
'''
## ViT_p32_384
Filename = 'ViT_p32_384.npy'
ViT_Extract(train_df, test_df, transform_3, model_vit_3, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)


In [None]:

doflip = True
'''
## ViT_p16_224_Flip
Filename = 'ViT_p16_224_Flip.npy'
ViT_Extract(train_df, test_df, transform_1, model_vit_1, ExtractFrom, doflip, Filename,  Train_Features_dict_Flip, Test_Features_dict_Flip)
'''

## ViT_p16_384_Flip
Filename = 'ViT_p16_384_Flip.npy'
ViT_Extract(train_df, test_df, transform_2, model_vit_2, ExtractFrom, doflip, Filename,  Train_Features_dict_Flip, Test_Features_dict_Flip)


## ViT_p32_384_Flip
Filename = 'ViT_p32_384_Flip.npy'
ViT_Extract(train_df, test_df, transform_3, model_vit_3, ExtractFrom, doflip, Filename,  Train_Features_dict_Flip, Test_Features_dict_Flip)


In [None]:
del model_vit_1, model_vit_2, model_vit_3
gc.collect()
torch.cuda.empty_cache()

# Clip

In [None]:

# --- Uncomment: for Clip to use ---
from transformers import CLIPModel, CLIPProcessor

# Adjust the paths to point to your extracted files
model_path_1 = "/kaggle/input/clip-vit/pytorch/b-32-laion2b-s34b-b79k/1"
model_path_2 = "/kaggle/input/clip-vit/pytorch/h-14-laion2b-s32b-b79k/1"
model_path_3 = "/kaggle/input/clip-vit/pytorch/l-14-datacomp-xl-s13b-b90k/1"

model_clip_1 = CLIPModel.from_pretrained(model_path_1)
processor_clip_1 = CLIPProcessor.from_pretrained(model_path_1)

model_clip_2 = CLIPModel.from_pretrained(model_path_2)
processor_clip_2 = CLIPProcessor.from_pretrained(model_path_2)

model_clip_3 = CLIPModel.from_pretrained(model_path_3)
processor_clip_3 = CLIPProcessor.from_pretrained(model_path_3)

model_clip_1 = model_clip_1.to(device)
model_clip_2 = model_clip_2.to(device)
model_clip_3 = model_clip_3.to(device)


  return self.fget.__get__(instance, owner)()
2024-08-20 08:00:05.372944: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 08:00:05.373085: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 08:00:05.545667: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:

class dataset_Clip:
    def __init__(self, df, directory, processor, doflip=False, test=False):
        self.df = df
        self.directory = directory
        self.processor = processor
        self.doflip = doflip
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        split = 'test' if self.test else 'train'
        filename = self.df.Id[idx]
        address = os.path.join(self.directory, split, filename+'.jpg')
        image=Image.open(address)
        if self.doflip==True:
            image = image.transpose(Image.FLIP_LEFT_RIGHT)
            width, height = image.size
            image = image.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))
        image = self.processor(images=image, return_tensors="pt", padding=True)
        for key, val in image.items():
            image[key] = val.squeeze()

        return image


In [None]:
def Clip_Extract(train_df, test_df, processor_clip, model_clip, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict):

    print('\n--- Extracting features: ' + Filename.split('.')[0] + ' ---')

    ExtractFromModel, Save, ExtractFromSaved = ExtractFrom

    if ExtractFromModel and not ExtractFromSaved:
        train_dataset = dataset_Clip(train_df, directory, processor_clip, doflip=doflip, test=False)
        train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)

        test_dataset = dataset_Clip(test_df, directory, processor_clip, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

        X = ExtractModelFeature(train_dataloader, model_clip)
        X_test = ExtractModelFeature(test_dataloader, model_clip)

        if Save:
            with open(Filename, 'wb') as f:
                np.save(f, X)
                np.save(f, X_test)

    elif ExtractFromSaved and not ExtractFromModel:
        FilePath = os.path.join(Extracted_Data_directory, Filename)

        with open(FilePath, 'rb') as f:
            X = np.load(f)
        test_dataset = dataset_Clip(test_df, directory, processor_clip, doflip=doflip, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)
        X_test = ExtractModelFeature(test_dataloader, model_clip)

    else:
        raise Exception("Please Select appropriate option")


    if ExtractFromModel or ExtractFromSaved:
        print('train: ', X.shape)
        print('test: ', X_test.shape)

        name = Filename.split('.')[0]
        Train_Features_dict[name] = X
        Test_Features_dict[name] = X_test



In [None]:

doflip = False

# Clip_b_32
Filename = 'Clip_b_32.npy'
Clip_Extract(train_df, test_df, processor_clip_1, model_clip_1, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)

# Clip_h_14
Filename = 'Clip_h_14.npy'
Clip_Extract(train_df, test_df, processor_clip_2, model_clip_2, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)

# Clip_l_14
Filename = 'Clip_l_14.npy'
Clip_Extract(train_df, test_df, processor_clip_3, model_clip_3, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)



In [None]:
doflip = True

# Clip_b_32_Flip
Filename = 'Clip_b_32_Flip.npy'
Clip_Extract(train_df, test_df, processor_clip_1, model_clip_1, ExtractFrom, doflip, Filename, Train_Features_dict_Flip, Test_Features_dict_Flip)

# Clip_h_14_Flip
Filename = 'Clip_h_14_Flip.npy'
Clip_Extract(train_df, test_df, processor_clip_2, model_clip_2, ExtractFrom, doflip, Filename, Train_Features_dict_Flip, Test_Features_dict_Flip)

# Clip_l_14_Flip
Filename = 'Clip_l_14_Flip.npy'
Clip_Extract(train_df, test_df, processor_clip_3, model_clip_3, ExtractFrom, doflip, Filename, Train_Features_dict_Flip, Test_Features_dict_Flip)


In [None]:
del model_clip_1, model_clip_2, model_clip_3
gc.collect()
torch.cuda.empty_cache()

# Swin

In [None]:

class dataset_Swin:
    def __init__(self, df, directory, transform, doflip=False, test=False):
        self.df = df
        self.directory = directory
        self.transform = transform
        self.doflip = doflip
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        split = 'test' if self.test else 'train'
        filename = self.df.Id[idx]
        address = os.path.join(self.directory, split, filename+'.jpg')
        img = Image.open(address).convert('RGB')

        if self.doflip==True:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            width, height = img.size
            img = img.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))

        image = self.transform(img) # transform and add batch dimension

        return image


In [None]:
ckp_path = '/kaggle/input/pretrained-models-pawpularity/swin_large_patch4_window12_384.pth'
model_swin = timm.create_model('swin_large_patch4_window12_384', checkpoint_path=ckp_path)

config = resolve_data_config({}, model=model_swin)
transform = create_transform(**config)

model_swin = model_swin.to(device)

In [None]:

def Swin_Extract(train_df, test_df, transform, model, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict):
    print('\n---Extracting features: ' + Filename.split('.')[0] + '---')
    ExtractFromModel, Save, ExtractFromSaved = ExtractFrom


    if ExtractFromModel and not ExtractFromSaved:
        train_dataset = dataset_Swin(train_df, directory, transform, doflip=doflip, test=False)
        train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)

        test_dataset = dataset_Swin(test_df, directory, transform, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


        X = ExtractModelFeature(train_dataloader, model)
        X_test = ExtractModelFeature(test_dataloader, model)

        if Save==True:
            with open(Filename, 'wb') as f:
                np.save(f, X)
                np.save(f, X_test)

    elif ExtractFromSaved and not ExtractFromModel:
        FilePath = os.path.join(Extracted_Data_directory, Filename)
        with open(FilePath, 'rb') as f:
            X = np.load(f)

        test_dataset = dataset_Swin(test_df, directory, transform, doflip=doflip, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)
        X_test = ExtractModelFeature(test_dataloader, model)


    else:
        raise Exception("Please Select appropriate option")


    if ExtractFromModel or ExtractFromSaved:
        print('train: ', X.shape)
        print('test: ', X_test.shape)

        name = Filename.split('.')[0]
        Train_Features_dict[name] = X
        Test_Features_dict[name] = X_test


In [None]:
doflip = False
Filename = 'SwinL_12_3.npy'

Swin_Extract(train_df, test_df, transform, model_swin, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)

In [None]:
del model_swin
gc.collect()
torch.cuda.empty_cache()

# Beit

In [None]:

class dataset_Beit:
    def __init__(self, df, directory, transform, doflip=False, test=False):
        self.df = df
        self.directory = directory
        self.transform = transform
        self.doflip = doflip
        self.test = test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        split = 'test' if self.test else 'train'
        filename = self.df.Id[idx]
        address = os.path.join(self.directory, split, filename+'.jpg')
        img = Image.open(address).convert('RGB')

        if self.doflip==True:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            width, height = img.size
            img = img.crop((0.0*width, 0.02*height, 0.98*width, 0.98 * height))

        image = self.transform(img) # transform and add batch dimension

        return image


In [None]:
ckp_path = '/kaggle/input/pretrained-models-pawpularity/beit_large_patch16_384.pth'
model_beit = timm.create_model('beit_large_patch16_384', checkpoint_path=ckp_path)

config = resolve_data_config({}, model=model_beit)
transform = create_transform(**config)

model_beit = model_beit.to(device)

In [None]:

def Beit_Extract(train_df, test_df, transform, model, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict):
    print('\n---Extracting features: ' + Filename.split('.')[0] + '---')
    ExtractFromModel, Save, ExtractFromSaved = ExtractFrom


    if ExtractFromModel and not ExtractFromSaved:
        train_dataset = dataset_Beit(train_df, directory, transform, doflip=doflip, test=False)
        train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=False)

        test_dataset = dataset_Beit(test_df, directory, transform, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


        X = ExtractModelFeature(train_dataloader, model)
        X_test = ExtractModelFeature(test_dataloader, model)

        if Save==True:
            with open(Filename, 'wb') as f:
                np.save(f, X)
                np.save(f, X_test)

    elif ExtractFromSaved and not ExtractFromModel:
        FilePath = os.path.join(Extracted_Data_directory, Filename)
        with open(FilePath, 'rb') as f:
            X = np.load(f)

        test_dataset = dataset_Beit(test_df, directory, transform, doflip=doflip, test=True)
        test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)
        X_test = ExtractModelFeature(test_dataloader, model)


    else:
        raise Exception("Please Select appropriate option")


    if ExtractFromModel or ExtractFromSaved:
        print('train: ', X.shape)
        print('test: ', X_test.shape)

        name = Filename.split('.')[0]
        Train_Features_dict[name] = X
        Test_Features_dict[name] = X_test


In [None]:
doflip = False
Filename = 'BeitL_16_3.npy'

Beit_Extract(train_df, test_df, transform, model_beit, ExtractFrom, doflip, Filename, Train_Features_dict, Test_Features_dict)

In [None]:
del model_beit
gc.collect()
torch.cuda.empty_cache()

In [None]:
def Meta_Extract(train_df, test_df, Train_Features_dict, Test_Features_dict):
    name = 'meta'

    X = train_df.iloc[:,1:13].values
    X_test = test_df.iloc[:,1:13].values

    Train_Features_dict[name] = X
    Test_Features_dict[name] = X_test

In [None]:
Meta_Extract(train_df, test_df, Train_Features_dict, Test_Features_dict)

In [None]:
Meta_Extract(train_df, test_df, Train_Features_dict_Flip, Test_Features_dict_Flip)

# Stack all data

In [None]:
for key, val in Train_Features_dict.items():
    print(key, '\t', val.shape)

In [None]:
for key, val in Train_Features_dict_Flip.items():
    print(key, '\t', val.shape)

In [None]:
All_model_list = ['EfficientNet_b6', 'EfficientNet_l2_ns_475',
                  'ViT_p16_224', 'ViT_p16_384', 'ViT_p32_384',
                  'Clip_b_32', 'Clip_h_14', 'Clip_l_14',
                  'SwinL_12_3', 'BeitL_16_3'
                  'meta']
model_list_A = ['EfficientNet_l2_ns_475', 'ViT_p16_224', 'ViT_p32_384',
                'Clip_b_32', 'Clip_h_14', 'Clip_l_14', 'meta']
model_list_B = ['SwinL_12_3', 'BeitL_16_3', 'Clip_h_14', 'meta']

#ALL_model_list_flip = ['EfficientNet_b6_Flip', 'EfficientNet_l2_ns_475_Flip',
#              'ViT_p16_224_Flip', 'ViT_p16_384_Flip', 'ViT_p32_384_Flip',
#              'Clip_b_32_Flip', 'Clip_h_14_Flip', 'Clip_l_14_Flip',
#              'meta']

model_list_Flip = ['EfficientNet_l2_ns_475_Flip', 'ViT_p16_384_Flip', 'ViT_p32_384_Flip',
                  'Clip_b_32_Flip', 'Clip_h_14_Flip', 'Clip_l_14_Flip', 'meta']

In [None]:

X = np.concatenate([Train_Features_dict[model] for model in model_list_A], axis=1)
X_test = np.concatenate([Test_Features_dict[model] for model in model_list_A], axis=1)

print('X shape: ', X.shape)
print('X_test shape: ', X_test.shape)

In [None]:

X_B = np.concatenate([Train_Features_dict[model] for model in model_list_B], axis=1)
X_test_B = np.concatenate([Test_Features_dict[model] for model in model_list_B], axis=1)

print('X shape: ', X_B.shape)
print('X_test shape: ', X_test_B.shape)


In [None]:

X_Flip = np.concatenate([Train_Features_dict_Flip[model] for model in Train_Features_dict_Flip.keys()], axis=1)
X_test_Flip = np.concatenate([Test_Features_dict_Flip[model] for model in Test_Features_dict_Flip.keys()], axis=1)

print('X shape: ', X_Flip.shape)
print('X_test shape: ', X_test_Flip.shape)

In [None]:
y = train_df.Pawpularity.values
print(y)

In [None]:
'''
del Train_Features_dict, Test_Features_dict, Train_Features_dict_Flip, Test_Features_dict_Flip
#del my_object
torch.cuda.empty_cache()
gc.collect()
'''

# Preprocess: transforms

In [None]:
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler


def Normalizer_StandardScaler(X, X_test):
    transformer = Normalizer().fit(np.vstack((X, X_test)))  # fit does nothing.
    X = transformer.transform(X)
    X_test = transformer.transform(X_test)

    scaler = StandardScaler()
    scaler.fit(np.vstack((X, X_test)))
    X = scaler.transform(X)
    X_test = scaler.transform(X_test)

    print('X(Shape): ', X.shape)
    print('X_test(Shape): ', X_test.shape)
    return X, X_test

def StandardScaler_(X, X_test):
    scaler = StandardScaler()
    scaler.fit(np.vstack((X, X_test)))
    X = scaler.transform(X)
    X_test = scaler.transform(X_test)

    print('X(Shape): ', X.shape)
    print('X_test(Shape): ', X_test.shape)
    return X, X_test

In [None]:
X, X_test = Normalizer_StandardScaler(X, X_test)

In [None]:
X_B, X_test_B = Normalizer_StandardScaler(X_B, X_test_B)

In [None]:
X_Flip, X_test_Flip = Normalizer_StandardScaler(X_Flip, X_test_Flip)

# SVR

In [None]:
def RMSE(y, y_pred):
    rsme = np.sqrt(np.mean( (y-y_pred)**2) )
    return rsme

In [None]:
def svr_fit_predict(X, X_test, y, device, epsilon=5.6):
    if device=='cuda':
        print('--- cuML ---')
        reg = SVR(C=16.0, kernel='rbf', degree=3, epsilon=epsilon, gamma='auto', max_iter=400000, output_type='numpy')
        reg.fit(X, y)
    else:
        print('--- sklearn ---')
        reg = SVR(C=16, kernel="rbf", degree=3, epsilon=epsilon, gamma='auto')
        reg.fit(X,y)

    y_pred_train = reg.predict(X)
    y_pred_test = reg.predict(X_test)

    print('RMSE_train: ', RMSE(y, y_pred_train))

    return y_pred_train, y_pred_test

In [None]:
y_pred_train_A, y_pred_test_A = svr_fit_predict(X, X_test, y, device)

In [None]:
y_pred_train_B, y_pred_test_B = svr_fit_predict(X_B, X_test_B, y, device)

In [None]:
y_pred_train_Flip, y_pred_test_Flip = svr_fit_predict(X_Flip, X_test_Flip, y, device)

In [None]:

from scipy.optimize import minimize

def min_func(K):
    y_pred_train = K[0]*y_pred_train_A + K[1]*y_pred_train_B + K[2]*y_pred_train_Flip
    return RMSE(y, y_pred_train)

res = minimize(min_func, [1/3]*3, method='Nelder-Mead', tol=1e-1)
K = res.x
res


In [None]:
y_pred_train = K[0]*y_pred_train_A + K[1]*y_pred_train_B + K[2]*y_pred_train_Flip
y_pred_test = K[0]*y_pred_test_A + K[1]*y_pred_test_B + K[2]*y_pred_test_Flip

print(K, RMSE(y, y_pred_train))

In [None]:
# Sammple (Train) to Check
print("Predicted values:", y_pred_train[:10])
print("True values: ", y[:10])


In [None]:
print("Predicted values:", y_pred_test)

In [None]:
# Prepare the submission DataFrame
submit = pd.DataFrame()
submit['Id'] = test_df['Id']
submit['Pawpularity'] = y_pred_test

# Save the submission to a CSV file
submit.to_csv('submission.csv', index=False)

In [None]:
submit