In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer, ViTFeatureExtractor, ViTModel
from torch.utils.data import Dataset, DataLoader
from PIL import Image 
import torch.nn.functional as F
from sklearn.metrics import f1_score
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
print('imported')

In [None]:
df_mal_train = pd.read_csv('/kaggle/input/misogyny/misogyny/misogyny/malayalam/train/train.csv')
df_mal_dev = pd.read_csv('/kaggle/input/misogyny/misogyny/misogyny/malayalam/dev/dev.csv')
df_mal_test = pd.read_csv('/kaggle/input/misogyny/misogyny/misogyny/malayalam/test/test.csv')

In [None]:
df_mal_train

In [None]:
df_mal_train.isnull().sum()

In [None]:
df_tam_train = pd.read_csv('/kaggle/input/misogyny/misogyny/misogyny/tamil/train/train.csv')
df_tam_dev = pd.read_csv('/kaggle/input/misogyny/misogyny/misogyny/tamil/dev/dev.csv')
df_tam_test = pd.read_csv('/kaggle/input/misogyny/misogyny/misogyny/tamil/test/test.csv')

In [None]:
df_tam_train

In [None]:
df_tam_train.isnull().sum()

In [None]:
def load_data(text_path, image_dir, is_test=False):
    
    df = pd.read_csv(text_path)
    
    image_paths = [os.path.join(image_dir, f"{img_id}.jpg") for img_id in df['image_id']]
    
    if is_test:
        labels = [0] * len(df)
    else:
        labels = df['labels'].tolist()
    
    return {
        'texts': df['transcriptions'].tolist(),
        'images': image_paths,
        'labels': labels,
        'image_ids': df['image_id'].tolist()
    }

In [None]:
def load_and_preprocess_data(text_data, image_paths, labels, tokenizer, feature_extractor, batch_size=16, is_train=True):

    class MemeDataset(Dataset):
        def __init__(self, texts, image_paths, labels, tokenizer, feature_extractor):
            self.texts = texts
            self.image_paths = image_paths
            self.labels = labels
            self.tokenizer = tokenizer
            self.feature_extractor = feature_extractor
        
        def __len__(self):
            return len(self.texts)
        
        def __getitem__(self, idx):
            text = str(self.texts[idx])
            encoding = self.tokenizer(
                text,
                padding='max_length',
                max_length=128,
                truncation=True,
                return_tensors='pt'
            )
            
            try:
                image = Image.open(self.image_paths[idx]).convert('RGB')
                image_features = self.feature_extractor(
                    images=image,
                    return_tensors='pt'
                )
            except Exception as e:
                print(f"Error loading image {self.image_paths[idx]}: {str(e)}")
                image = Image.new('RGB', (224, 224), color='black')
                image_features = self.feature_extractor(
                    images=image,
                    return_tensors='pt'
                )
            
            return {
                'input_ids': encoding['input_ids'].squeeze(),
                'attention_mask': encoding['attention_mask'].squeeze(),
                'pixel_values': image_features['pixel_values'].squeeze(),
                'label': torch.tensor(self.labels[idx], dtype=torch.long)
            }

    dataset = MemeDataset(text_data, image_paths, labels, tokenizer, feature_extractor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=is_train)
    return dataloader

In [None]:
def create_model(device):

    text_model = AutoModel.from_pretrained("ai4bharat/indic-bert").to(device)
    
    image_model = ViTModel.from_pretrained("google/vit-base-patch16-224").to(device)
    
    fusion_model = nn.Sequential(
        nn.Linear(768 * 2, 512),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(512, 1)
    ).to(device)
    
    return text_model, image_model, fusion_model

In [None]:
def forward_pass(batch, text_model, image_model, fusion_model, device):

    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    pixel_values = batch['pixel_values'].to(device)
    
    text_outputs = text_model(input_ids=input_ids, attention_mask=attention_mask)
    text_embeddings = text_outputs.last_hidden_state[:, 0, :]
    
    image_outputs = image_model(pixel_values)
    image_embeddings = image_outputs.last_hidden_state[:, 0, :]
    
    fused = torch.cat([text_embeddings, image_embeddings], dim=1)
    output = fusion_model(fused)
    
    return output.squeeze()

In [None]:
def train_epoch(text_model, image_model, fusion_model, train_loader, optimizer, criterion, device):

    text_model.train()
    image_model.train()
    fusion_model.train()
    
    total_loss = 0
    all_preds = []
    all_labels = []
    
    for batch in tqdm(train_loader, desc="Training"):
        labels = batch['label'].to(device)
        
        optimizer.zero_grad()
        outputs = forward_pass(batch, text_model, image_model, fusion_model, device)
        loss = criterion(outputs, labels.float())
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        preds = (torch.sigmoid(outputs) > 0.5).int().cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.cpu().numpy())
    avg_loss = total_loss / len(train_loader)
    f1 = f1_score(all_labels, all_preds)
    
    return avg_loss, f1

In [None]:
def evaluate(text_model, image_model, fusion_model, val_loader, criterion, device):

    text_model.eval()
    image_model.eval()
    fusion_model.eval()
    
    total_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Evaluating"):
            labels = batch['label'].to(device)
            outputs = forward_pass(batch, text_model, image_model, fusion_model, device)
            loss = criterion(outputs, labels.float())
            
            total_loss += loss.item()
            
            preds = (torch.sigmoid(outputs) > 0.5).int().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(val_loader)
    f1 = f1_score(all_labels, all_preds)
    
    return avg_loss, f1

In [None]:
def get_predictions(text_model, image_model, fusion_model, test_loader, device):
    text_model.eval()
    image_model.eval()
    fusion_model.eval()
    
    all_preds = []
    all_probs = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Generating predictions"):
            outputs = forward_pass(batch, text_model, image_model, fusion_model, device)
            probabilities = torch.sigmoid(outputs)
            predictions = (probabilities > 0.5).int()
            
            all_probs.extend(probabilities.cpu().numpy())
            all_preds.extend(predictions.cpu().numpy())
    
    return all_preds, all_probs

In [None]:
def train_model(train_text_path, train_image_dir, 
                val_text_path, val_image_dir,
                test_text_path, test_image_dir,language,
                num_epochs=5, batch_size=16, learning_rate=2e-5):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    print("Loading data...")
    train_data = load_data(train_text_path, train_image_dir, is_test=False)
    val_data = load_data(val_text_path, val_image_dir, is_test=False)
    test_data = load_data(test_text_path, test_image_dir, is_test=True)  # Note is_test=True
    
    print("Initializing models...")
    tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")
    feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
    
    train_loader = load_and_preprocess_data(
        train_data['texts'], train_data['images'], train_data['labels'],
        tokenizer, feature_extractor, batch_size, is_train=True
    )
    val_loader = load_and_preprocess_data(
        val_data['texts'], val_data['images'], val_data['labels'],
        tokenizer, feature_extractor, batch_size, is_train=False
    )
    test_loader = load_and_preprocess_data(
        test_data['texts'], test_data['images'], test_data['labels'],
        tokenizer, feature_extractor, batch_size, is_train=False
    )
    
    text_model, image_model, fusion_model = create_model(device)
    
    params = list(text_model.parameters()) + list(image_model.parameters()) + list(fusion_model.parameters())
    optimizer = torch.optim.AdamW(params, lr=learning_rate)
    criterion = nn.BCEWithLogitsLoss()
    
    best_val_f1 = 0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        train_loss, train_f1 = train_epoch(
            text_model, image_model, fusion_model,
            train_loader, optimizer, criterion, device
        )
        
        val_loss, val_f1 = evaluate(
            text_model, image_model, fusion_model,
            val_loader, criterion, device
        )
        
        print(f"Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")
        
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            torch.save({
                'text_model_state_dict': text_model.state_dict(),
                'image_model_state_dict': image_model.state_dict(),
                'fusion_model_state_dict': fusion_model.state_dict(),
            }, 'best_model.pth')
    
    print("\nGenerating test predictions...")
    checkpoint = torch.load('best_model.pth')
    text_model.load_state_dict(checkpoint['text_model_state_dict'])
    image_model.load_state_dict(checkpoint['image_model_state_dict'])
    fusion_model.load_state_dict(checkpoint['fusion_model_state_dict'])
    
    predictions, probabilities = get_predictions(
        text_model, image_model, fusion_model,
        test_loader, device
    )
    
    predictions_df = pd.DataFrame({
        'image_id': test_data['image_ids'],
        'predicted_label': predictions,
        'probability': probabilities
    })
    
    predictions_df.to_csv(f'predictions_{language}.csv', index=False)
    print(f"\nPredictions saved to 'predictions_{language}.csv'")

if __name__ == "__main__":
    # Kaggle paths
    TRAIN_TEXT_PATH = '/kaggle/input/misogyny/misogyny/misogyny/malayalam/train/train.csv'
    TRAIN_IMAGE_DIR = '/kaggle/input/misogyny/misogyny/misogyny/malayalam/train/memes'
    
    VAL_TEXT_PATH = '/kaggle/input/misogyny/misogyny/misogyny/malayalam/dev/dev.csv'
    VAL_IMAGE_DIR = '/kaggle/input/misogyny/misogyny/misogyny/malayalam/dev/memes'
    
    TEST_TEXT_PATH = '/kaggle/input/misogyny/misogyny/misogyny/malayalam/test/test.csv'
    TEST_IMAGE_DIR = '/kaggle/input/misogyny/misogyny/misogyny/malayalam/test/memes'
    
    # Train the model
    train_model(
        TRAIN_TEXT_PATH, TRAIN_IMAGE_DIR,
        VAL_TEXT_PATH, VAL_IMAGE_DIR,
        TEST_TEXT_PATH, TEST_IMAGE_DIR,
        'malayalam'
    )

    TRAIN_TEXT_PATH = '/kaggle/input/misogyny/misogyny/misogyny/tamil/train/train.csv'
    TRAIN_IMAGE_DIR = '/kaggle/input/misogyny/misogyny/misogyny/tamil/train/memes'
    
    VAL_TEXT_PATH = '/kaggle/input/misogyny/misogyny/misogyny/tamil/dev/dev.csv'
    VAL_IMAGE_DIR = '/kaggle/input/misogyny/misogyny/misogyny/tamil/dev/memes'
    
    TEST_TEXT_PATH = '/kaggle/input/misogyny/misogyny/misogyny/tamil/test/test.csv'
    TEST_IMAGE_DIR = '/kaggle/input/misogyny/misogyny/misogyny/tamil/test/memes'
    
    train_model(
        TRAIN_TEXT_PATH, TRAIN_IMAGE_DIR,
        VAL_TEXT_PATH, VAL_IMAGE_DIR,
        TEST_TEXT_PATH, TEST_IMAGE_DIR,
        'tamil'
    )
    

In [None]:
tamil_pred=pd.read_csv('/kaggle/working/predictions_tamil.csv')
tamil_pred

In [None]:
malayalam_pred=pd.read_csv('/kaggle/working/predictions_malayalam.csv')
malayalam_pred

In [None]:
import zipfile

tamil_filtered = tamil_pred[['image_id', 'predicted_label']]
malayalam_filtered = malayalam_pred[['image_id', 'predicted_label']]

tamil_csv_path = 'tamil.csv'
tamil_filtered.to_csv(tamil_csv_path, index=False, header=False)

malayalam_csv_path = 'malayalam.csv'
malayalam_filtered.to_csv(malayalam_csv_path, index=False, header=False)

team_name = "CUET-NLP_MP"

zip_file_path = f"{team_name}.zip"
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
    zipf.write(tamil_csv_path, arcname=f"{team_name}_tamil_run1.csv")
    zipf.write(malayalam_csv_path, arcname=f"{team_name}_malayalam_run1.csv")

zip_file_path

In [None]:
#malayalam
mal_labels = pd.read_csv('/kaggle/input/misogyny/test_with_labels_malayalam/test_with_labels.csv')
f1 = f1_score(mal_labels['labels'], malayalam_pred['predicted_label'], average='macro') 
print(f"F1 Score: {f1:.4f}")


In [None]:
#tamil
tam_labels = pd.read_csv('/kaggle/input/misogyny/test_with_labels_tamil/test_with_labels.csv')
f1 = f1_score(tam_labels['labels'], tamil_pred['predicted_label'], average='macro') 
print(f"F1 Score: {f1:.4f}")