In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import models, datasets, transforms
from tqdm.notebook import tqdm_notebook as tqdm
from PIL import Image
import io, os
from sklearn.model_selection import train_test_split
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings("ignore")

device = 'cuda' if torch.cuda.is_available else 'cpu'

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration

In [None]:
train_df = pd.read_csv("/kaggle/input/image-vaptioning/simple.csv", 
                       encoding='ISO-8859-1')

In [None]:
train_df.head()

In [None]:
base_path = "/kaggle/input/company-images-data"

In [None]:
companies = os.listdir(base_path)

In [None]:
np.random.seed(42)
val_indices = np.random.choice(np.arange(0,219),20, replace=False)

val_indices

In [None]:
val_companies = [companies[i] for i in val_indices]
train_companies = [companies[i] for i in range(219) if i not in val_indices]

In [None]:
# len(train_companies), len(val_companies)

In [None]:
train_images = []
val_images = []
train_indices = []
val_indices = []
for company in os.listdir(base_path):
    company_path = os.path.join(base_path, company)
    if company not in val_companies:
        for img_path in os.listdir(company_path):
            train_images.append(Image.open(os.path.join(company_path, img_path)).convert('RGB'))
            train_indices.append(int(img_path.split('.')[0]))
            
    else:
        for img_path in os.listdir(company_path):
            val_images.append(Image.open(os.path.join(company_path, img_path)).convert('RGB'))
            val_indices.append(int(img_path.split('.')[0]))

In [None]:
train_transform = A.Compose([
    A.Resize(224,224),
    ToTensorV2()])
train_data = dataset(train_df, train_images, train_indices, train_transform, True)
val_data = dataset(train_df, val_images, val_indices, train_transform, True)
train_load = DataLoader(train_data, batch_size=32, shuffle=False, num_workers=os.cpu_count())
val_load = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=os.cpu_count())

In [None]:
image = Image.open("/kaggle/input/company-images-data/buffalo wild wings/10142.jpg")
plt.imshow(image)

In [None]:
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

inputs = processor(images=image, return_tensors="pt")

output = model.generate(**inputs)
caption = processor.decode(output[0], skip_special_tokens=True)
print("Generated Caption:", caption)

In [None]:
cap_list = []
model=model.to(device)
for batch_images, _ in tqdm(train_load):
    inputs = processor(images=list(batch_images), return_tensors="pt", padding=True)
    inputs=inputs.to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs)
    outputs=outputs.cpu()
    captions = [processor.decode(output, skip_special_tokens=True) for output in outputs]

    cap_list.append(captions)
#     for img_path, caption in zip(img_paths, captions):
#         print(f"Image: {img_path}, Caption: {caption}")

In [None]:
cp=[]
for i in cap_list:
    for j in i:
        cp.append(j)
simple=pd.DataFrame()
simple['image_captioning']=cp
simple.to_csv('/kaggle/working/simple.csv')

In [None]:
new_val_data = []
for i in range(10):
    new_val_data.append(val_data[i][0])

In [None]:
new_val_load = DataLoader(new_val_data, batch_size=1, shuffle=False, num_workers=os.cpu_count())

In [None]:
# new_val_load

In [None]:
preds = []
with torch.inference_mode():
    for img in new_val_load:
        img = img.to(torch.float32)
        img = img.to(device)
        y_pred = model(img)
        preds.append(y_pred)

In [None]:
# preds

In [None]:
# np.mean(train_df.iloc[train_indices, 2]), np.mean(train_df.iloc[val_indices, 2])

In [None]:
from transformers import AutoModel, AutoTokenizer
# model_name = 'bert-base-uncased'
model = AutoModel.from_pretrained("vinai/bertweet-base")
# tokenizer = BertTokenizer.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
# model = BertModel.from_pretrained(model_name)
model.eval()
simp=[i for i in train_df['image_captioning']]
inputs = tokenizer(simp, padding=True, truncation=True, return_tensors='pt')


In [None]:
import gc
def process_in_chunks(model, inputs, batch_size=32):
    model = model.to(device)
    model.eval()  
    all_cls_embeddings = []

    num_batches = len(inputs['input_ids']) // batch_size + 1
    
    with torch.no_grad():  
        for batch_idx in range(num_batches):
            batch_inputs = {k: v[batch_idx * batch_size : (batch_idx + 1) * batch_size].to(device)
                            for k, v in inputs.items()}
            outputs = model(**batch_inputs)
            cls_embeddings = outputs['last_hidden_state'][:, 0, :]  
            all_cls_embeddings.append(cls_embeddings.cpu()) 
            del batch_inputs, outputs, cls_embeddings
            gc.collect() 
#             torch.cuda.empty_cache()  # Clear cache (optional if using GPU)
    
    all_cls_embeddings = torch.cat(all_cls_embeddings, dim=0)
    
    return all_cls_embeddings
cls_embeddings = process_in_chunks(model, inputs, batch_size=32)
print(cls_embeddings.shape)

In [None]:
train_embeddings_cls_np = cls_embeddings.detach().cpu().numpy()
embedding_columns = [f"cls_embedding_{i}" for i in range(train_embeddings_cls_np.shape[1])]
embeddings_df = pd.DataFrame(train_embeddings_cls_np, columns=embedding_columns)

In [None]:
from catboost import CatBoostRegressor

In [None]:
len(train_indices), len(val_indices)

In [None]:
df_train = pd.read_csv("/kaggle/input/train-data/behaviour_simulation_train_csv.csv", encoding='ISO-8859-1')

In [None]:
df_train.head()

In [None]:
y_train, y_val = df_train.iloc[train_indices, 2], df_train.iloc[val_indices, 2]

In [None]:
train_data, test_data, y_t, y_v = train_test_split(embeddings_df, y_train, test_size=0.2, shuffle=True, random_state=42)

In [None]:
train_data.shape, y_t.shape

In [None]:
ctb=CatBoostRegressor(n_estimators = 1000,
                        learning_rate = 0.01,
                        l2_leaf_reg = 0.05,
                        max_depth = 7,
                        loss_function = 'RMSE',
                        eval_metric = 'RMSE',
                        task_type = 'GPU',
                        random_seed = 42,
                        verbose = 100,)

ctb.fit(train_data, y_t, eval_set = (np.array(test_data), np.array(y_v)), verbose_eval = 100)

In [None]:
train_data.head()

In [None]:
class neural_net(nn.Module):
    def __init__(self, input_size, output_size):
        super(neural_net, self).__init__()
        self.linear1 = nn.Linear(in_features = input_size, out_features = 256, bias = True)
        self.bn1=nn.BatchNorm1d(256)
        self.relu1 = nn.ReLU()
        self.dr1=nn.Dropout1d()
        self.linear2 = nn.Linear(in_features = 256, out_features = 128, bias = True)
        self.bn2=nn.BatchNorm1d(128)
        self.relu2 = nn.ReLU()
        self.dr2=nn.Dropout1d()
        self.linear3 = nn.Linear(in_features = 128, out_features = output_size, bias = True)
    
    def forward(self, x):
#         out1 = 
#         out2 = 
        out3 = self.linear3(self.relu2(self.bn2(self.linear2(self.relu1(self.bn1(self.linear1(x)))))))
        return out3
nn_model = neural_net(input_size = 768, output_size = 1).to(device)

In [None]:
class nn_dataset(Dataset):
    def __init__(self, X, y):
        super().__init__()
        self.x = X.reset_index(drop = True)
        self.y = y.reset_index(drop = True)
#         self.transform = transform
#         self.train = train
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        X = torch.tensor(self.x.iloc[idx])
        y = torch.tensor(self.y.iloc[idx])
            
        return X, y

In [None]:
nn_train_data = nn_dataset(train_data, y_t)
nn_val_data = nn_dataset(test_data, y_v)
nn_train_dataloader = DataLoader(nn_train_data, batch_size = 32, shuffle = False, num_workers = os.cpu_count())
nn_val_dataloader = DataLoader(nn_val_data, batch_size = 32, shuffle = False, num_workers = os.cpu_count())

In [None]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))

In [None]:
optimizer_nn = torch.optim.SGD(params = nn_model.parameters(), lr = 1e-3)
loss_fn = RMSELoss()
for epoch in range(5):
    nn_model.train()
    net_loss = 0.0
    for X, y in tqdm(nn_train_dataloader):
        X = X.to(torch.float32).to(device)
        y = y.to(torch.float32).to(device)
        optimizer_nn.zero_grad()
        y_pred = nn_model(X)
        loss = loss_fn(y_pred, y)
        net_loss += (loss.item()) / len(nn_train_dataloader)
        loss.backward()
        optimizer_nn.step()
        del X,y
    print(f"epoch {epoch+1} | Train Loss: {net_loss}")

    nn_model.eval()
    net_loss_val = 0.0
    with torch.inference_mode():
        for X, y in tqdm(nn_val_dataloader):
            X = X.to(torch.float32).to(device)
            y = y.to(torch.float32).to(device)
            y_val_pred = nn_model(X)
            val_loss = loss_fn(y_val_pred, y)
            net_loss_val += (val_loss.item()) / len(nn_val_dataloader)
            del X,y
        print(f"epoch {epoch+1} | Val Loss: {net_loss_val}")