### Importing Libraries

In [2]:
import json
import os
import random
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from transformers import BertTokenizer, BertModel
from torchtext.data.metrics import bleu_score
from nltk.translate.meteor_score import meteor_score
import pandas as pd

### Defining file paths

In [3]:
train_val_json_path = "C:/raj/ML/meme challenge/memes-trainval.json"
test_json_path = "C:/raj/ML/meme challenge/mlr_captioning_TEST.json"
image_folder_path = "C:/raj/ML/meme challenge/sirf aur sirf memes"

#### Load datasets

In [4]:
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

In [30]:
# Load training, validation, and test datasets
train_val_data = load_json(train_val_json_path)
test_data = load_json(test_json_path)
train_val_data

[{'category': 'memes',
  'img_captions': ['Person in Spider Man outfit gives a lecture on stage.',
   'Person dressed as spider man stands in front of crowd with notes'],
  'meme_captions': ['Meme poster is frustrated about the format of the website and is making a suggestion for improvement.'],
  'title': 'For real though',
  'url': 'https://i.redd.it/m16dhaqyply21.jpg',
  'img_fname': 'memes_bpet7l.png',
  'metaphors': [{'metaphor': 'Spider Man outfit', 'meaning': 'Meme poster'},
   {'metaphor': 'a lecture', 'meaning': 'complaint'},
   {'metaphor': 'spider man', 'meaning': 'Meme poster'},
   {'metaphor': 'crowd', 'meaning': 'meme readers'}],
  'post_id': 'bpet7l'},
 {'category': 'memes',
  'img_captions': ['Two dogs carry a white flag',
   'Identical chubby animated dogs carry a white banner.'],
  'meme_captions': ['Meme poster is saying that searching Google plus the term you want to search on reddit is better than searching reddit itself.',
   'Meme poster is telling everyone that 

#### Split the data into training and validation sets

In [6]:
def split_data(data, train_ratio=0.8):
    random.shuffle(data)
    split_idx = int(len(data) * train_ratio)
    train_data = data[:split_idx]
    val_data = data[split_idx:]
    return train_data, val_data

train_data, val_data = split_data(train_val_data)

### Dataset class

In [49]:
class MemeDataset(Dataset):
    def _init_(self, data, img_dir, transform = None):
        self.data = data
        self.img_dir = img_dir
        self.transform = transform
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        img_path = os.path.join(self.img_dir, item['img_fname'])
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        captions = item['img_captions'][0]  # Use the first caption for simplicity
        tokenized_captions = self.tokenizer(captions, padding = 'max_length', truncation = True, return_tensors = 'pt')
        
        return image, tokenized_captions, item['meme_captions']


### Define transformations

In [47]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

### DataLoader

In [50]:
train_dataset = MemeDataset()
train_dataset.data = train_data
train_dataset.img_dir = image_folder_path
train_dataset.transform = transform

val_dataset = MemeDataset()
val_dataset.data = val_data
val_dataset.img_dir = image_folder_path
val_dataset.transform = transform

train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = False)
val_loader = DataLoader(val_dataset, batch_size = 32, shuffle = False)

### Define the model

In [54]:
class MemeCaptioningModel(nn.Module):
    def _init_(self):
        super(MemeCaptioningModel, self)._init_()
        self.cnn = models.resnet50(pretrained = True)
        self.cnn.fc = nn.Identity()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc = nn.Linear(self.cnn.fc.in_features + self.bert.config.hidden_size, 512)
        self.decoder = nn.TransformerDecoder(
            nn.TransformerDecoderLayer(d_model = 512, nhead = 8), num_layers = 6)
        self.fc_out = nn.Linear(512, self.bert.config.vocab_size)
        
    def forward(self, images, captions):
        cnn_features = self.cnn(images)
        
        bert_output = self.bert(**captions)
        bert_features = bert_output.last_hidden_state.mean(dim = 1)
        
        combined_features = torch.cat((cnn_features, bert_features), dim = 1)
        combined_features = self.fc(combined_features).unsqueeze(0)
        
        tgt = torch.zeros((captions['input_ids'].size(0), 1, 512), device=combined_features.device)
        output = self.decoder(tgt, combined_features)
        
        return self.fc_out(output.squeeze(0))


### Instantiate and train the model

In [57]:
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MemeCaptioningModel()
optimizer = torch.optim.Adam(model.parameters(), lr = 5e-5)
criterion = nn.CrossEntropyLoss()

ValueError: optimizer got an empty parameter list

### Training Loop

In [45]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for images, captions, meme_captions in train_loader:
        images, captions = images.to(device), {k: v.to(device) for k, v in captions.items()}
        optimizer.zero_grad()
        outputs = model(images, captions)
        loss = criterion(outputs.transpose(1, 2), captions['input_ids'])
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")

TypeError: object of type 'MemeDataset' has no len()

### Evaluation function

In [41]:
def evaluate_model(model, data_loader):
    model.eval()
    actuals, predictions = [], []
    with torch.no_grad():
        for images, captions, meme_captions in data_loader:
            images, captions = images.to(device), {k: v.to(device) for k, v in captions.items()}
            outputs = model(images, captions)
            predicted_ids = outputs.argmax(dim=2)
            predicted_captions = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)
            
            actuals.extend(meme_captions)
            predictions.extend(predicted_captions)
    
    bleu = bleu_score(predictions, [[a] for a in actuals])
    meteor = sum(meteor_score([a], p) for a, p in zip(actuals, predictions)) / len(actuals)
    return bleu, meteor

### Evaluate on validation set and save the model

In [42]:
bleu, meteor = evaluate_model(model, val_loader)
print(f"BLEU Score: {bleu}, METEOR Score: {meteor}")

torch.save(model.state_dict(), 'meme_captioning_model.pth')

TypeError: object of type 'MemeDataset' has no len()