In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if (filename.endswith(".csv")) or (filename.endswith(".pt")):
            print(os.path.join(dirname, filename))

/kaggle/input/fe-misogyny-meme-detection/val_text_embeddings.pt
/kaggle/input/fe-misogyny-meme-detection/test_text_embeddings.pt
/kaggle/input/fe-misogyny-meme-detection/train_image_embeddings.pt
/kaggle/input/fe-misogyny-meme-detection/val_image_embeddings.pt
/kaggle/input/fe-misogyny-meme-detection/test_image_embeddings.pt
/kaggle/input/fe-misogyny-meme-detection/train_text_embeddings.pt
/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/dev-20250101T183837Z-001/dev/dev.csv
/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/train-20250101T183816Z-001/train/train.csv
/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/test-20250101T183840Z-001/test/test.csv
/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Malayalam/train-20250101T182940Z-001/train/train.csv
/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Malayalam/dev-20250101T182941Z-001/dev/dev.csv
/kaggle/input/3-misogyny-meme-detection

In [None]:
train_df=pd.read_csv("/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/train-20250101T183816Z-001/train/train.csv")
train_df.head()

In [None]:
val_df=pd.read_csv("/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/dev-20250101T183837Z-001/dev/dev.csv")
val_df.head()

In [None]:
test_df=pd.read_csv("/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/test-20250101T183840Z-001/test/test.csv")
test_df.head()

In [5]:
import os
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from transformers import AutoModel, AutoTokenizer, AutoProcessor
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from tqdm import tqdm

In [6]:
# Paths
TRAIN_IMAGE_FOLDER = "/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/train-20250101T183816Z-001/train"
VAL_IMAGE_FOLDER = "/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/dev-20250101T183837Z-001/dev"
TEST_IMAGE_FOLDER = "/kaggle/input/3-misogyny-meme-detection/3 Misogyny Meme Detection/Tamil/test-20250101T183840Z-001/test"

In [7]:
# Parameters
IMAGE_MODEL_NAME = "openai/clip-vit-base-patch32"  # Example vision model
TEXT_MODEL_NAME = "Hate-speech-CNERG/tamil-codemixed-abusive-MuRIL"  # Example Tamil BERT model
BATCH_SIZE = 8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
def load_embeddings(embedding_path):
    if os.path.exists(embedding_path):
        print(f"Loading embeddings from {embedding_path}")
        return torch.load(embedding_path)
    else:
        raise FileNotFoundError(f"Embeddings file not found at {embedding_path}")

In [9]:
# Load training, validation, and test embeddings
train_image_embeddings = load_embeddings("/kaggle/input/fe-misogyny-meme-detection/train_image_embeddings.pt")
val_image_embeddings = load_embeddings("/kaggle/input/fe-misogyny-meme-detection/val_image_embeddings.pt")
test_image_embeddings = load_embeddings("/kaggle/input/fe-misogyny-meme-detection/test_image_embeddings.pt")

train_text_embeddings = load_embeddings("/kaggle/input/fe-misogyny-meme-detection/train_text_embeddings.pt")
val_text_embeddings = load_embeddings("/kaggle/input/fe-misogyny-meme-detection/val_text_embeddings.pt")
test_text_embeddings = load_embeddings("/kaggle/input/fe-misogyny-meme-detection/test_text_embeddings.pt")

Loading embeddings from /kaggle/input/fe-misogyny-meme-detection/train_image_embeddings.pt
Loading embeddings from /kaggle/input/fe-misogyny-meme-detection/val_image_embeddings.pt
Loading embeddings from /kaggle/input/fe-misogyny-meme-detection/test_image_embeddings.pt
Loading embeddings from /kaggle/input/fe-misogyny-meme-detection/train_text_embeddings.pt
Loading embeddings from /kaggle/input/fe-misogyny-meme-detection/val_text_embeddings.pt
Loading embeddings from /kaggle/input/fe-misogyny-meme-detection/test_text_embeddings.pt


  return torch.load(embedding_path)


In [30]:
def combine_embeddings(image_embeddings, text_embeddings, df, has_labels=True):
    combined_embeddings = []
    labels = [] if has_labels else None

    for idx, row in df.iterrows():
        image_id = row["image_id"]
        if image_id in image_embeddings and image_id in text_embeddings:
            # Squeeze to remove unnecessary dimensions
            image_embedding = image_embeddings[image_id].squeeze()
            text_embedding = text_embeddings[image_id].squeeze()

            # Combine image and text embeddings
            combined = torch.cat([image_embedding, text_embedding], dim=-1)
            combined_embeddings.append(combined)

            if has_labels:
                labels.append(row["labels"])

    if has_labels:
        return torch.stack(combined_embeddings), torch.tensor(labels)
    else:
        return torch.stack(combined_embeddings)

In [31]:
X_train, y_train = combine_embeddings(train_image_embeddings, train_text_embeddings, train_df)
X_val, y_val = combine_embeddings(val_image_embeddings, val_text_embeddings, val_df)
X_test = combine_embeddings(test_image_embeddings, test_text_embeddings, test_df, has_labels=False)

print(f"Training data shape: {X_train.shape}, Labels: {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, Labels: {y_val.shape}")
print(f"Test data shape: {X_test.shape}")


Training data shape: torch.Size([640, 1280]), Labels: torch.Size([640])
Validation data shape: torch.Size([160, 1280]), Labels: torch.Size([160])
Test data shape: torch.Size([200, 1280])


In [32]:
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [45]:
# Define the MLP model
class MLPClassifier(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(MLPClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_sizes[0]),
            nn.ReLU(),
            nn.Linear(hidden_sizes[0], hidden_sizes[1]),
            nn.ReLU(),
            nn.Linear(hidden_sizes[1], output_size),
            nn.Sigmoid(),  # Use Sigmoid for binary classification
        )

    def forward(self, x):
        return self.model(x)

In [34]:
# Hyperparameters
input_size = X_train.shape[1]
hidden_sizes = [786, 512]
output_size = 1
batch_size = 8
num_epochs = 10
learning_rate = 0.001

In [46]:
# Prepare datasets and loaders
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [47]:
# Initialize model, loss function, and optimizer
model = MLPClassifier(input_size, hidden_sizes, output_size).to(device)
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [48]:
import pandas as pd

def train_and_save_best_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, save_dir):
    best_f1 = -float('inf')  # Initialize to a very low value to track the best model
    best_model_path = None  # Path to save the best model

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        all_train_preds, all_train_labels = [], []

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float()
            outputs = model(inputs).squeeze()

            # Compute loss
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

            # Collect predictions and labels for metrics
            preds = (outputs > 0.5).int()
            all_train_preds.extend(preds.tolist())
            all_train_labels.extend(labels.tolist())

        # Calculate training metrics
        train_accuracy = accuracy_score(all_train_labels, all_train_preds)
        train_precision, train_recall, train_f1, _ = precision_recall_fscore_support(
            all_train_labels, all_train_preds, average="macro"
        )

        # Validation phase
        model.eval()
        val_loss = 0
        all_val_preds, all_val_labels = [], []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device).float()
                outputs = model(inputs).squeeze()

                # Compute loss
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Collect predictions and labels for metrics
                preds = (outputs > 0.5).int()
                all_val_preds.extend(preds.tolist())
                all_val_labels.extend(labels.tolist())

        # Calculate validation metrics
        val_accuracy = accuracy_score(all_val_labels, all_val_preds)
        val_precision, val_recall, val_f1, _ = precision_recall_fscore_support(
            all_val_labels, all_val_preds, average="macro"
        )

        # Print metrics for the current epoch
        print(
            f"Epoch {epoch + 1}/{num_epochs}: "
            f"Train Loss: {train_loss / len(train_loader):.4f}, "
            f"Train Acc: {train_accuracy:.4f}, Prec: {train_precision:.4f}, Rec: {train_recall:.4f}, F1: {train_f1:.4f} | "
            f"Val Loss: {val_loss / len(val_loader):.4f}, "
            f"Val Acc: {val_accuracy:.4f}, Prec: {val_precision:.4f}, Rec: {val_recall:.4f}, F1: {val_f1:.4f}"
        )

        # Save the model if it has the best F1 score on validation
        if val_f1 > best_f1:
            best_f1 = val_f1
            best_model_path = f"{save_dir}/best_model_epoch_{epoch + 1}_f1_{val_f1:.4f}.pth"
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved with F1: {val_f1:.4f} at epoch {epoch + 1}")

    return best_model_path

In [51]:
import os

save_dir = "./saved_models"
os.makedirs(save_dir, exist_ok=True)


best_model_path = train_and_save_best_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    save_dir=save_dir
)

Epoch 1/10: Train Loss: 0.2611, Train Acc: 0.8938, Prec: 0.8924, Rec: 0.8860, F1: 0.8889 | Val Loss: 0.3448, Val Acc: 0.8625, Prec: 0.8986, Rec: 0.8282, F1: 0.8448
Best model saved with F1: 0.8448 at epoch 1
Epoch 2/10: Train Loss: 0.2344, Train Acc: 0.9094, Prec: 0.9132, Rec: 0.8985, F1: 0.9044 | Val Loss: 0.2611, Val Acc: 0.8938, Prec: 0.9016, Rec: 0.8762, F1: 0.8855
Best model saved with F1: 0.8855 at epoch 2
Epoch 3/10: Train Loss: 0.2001, Train Acc: 0.9219, Prec: 0.9235, Rec: 0.9140, F1: 0.9181 | Val Loss: 0.2870, Val Acc: 0.8812, Prec: 0.8741, Rec: 0.8798, F1: 0.8766
Epoch 4/10: Train Loss: 0.2016, Train Acc: 0.9266, Prec: 0.9261, Rec: 0.9210, F1: 0.9234 | Val Loss: 0.3050, Val Acc: 0.8625, Prec: 0.8730, Rec: 0.8393, F1: 0.8501
Epoch 5/10: Train Loss: 0.2011, Train Acc: 0.9328, Prec: 0.9340, Rec: 0.9263, F1: 0.9297 | Val Loss: 0.2697, Val Acc: 0.9125, Prec: 0.9084, Rec: 0.9084, F1: 0.9084
Best model saved with F1: 0.9084 at epoch 5
Epoch 6/10: Train Loss: 0.1524, Train Acc: 0.948

In [63]:
def predict_and_generate_submission(test_loader, best_model_path, submission_file_path):
    # Load the best model with weights_only=True to avoid security warnings
    model = MLPClassifier(input_size, hidden_sizes, output_size).to(device)
    model.load_state_dict(torch.load(best_model_path, weights_only=True))
    model.eval()  # Set the model to evaluation mode

    test_predictions = []
    with torch.no_grad():
        for inputs in test_loader:
            # Ensure inputs are converted to a tensor and stacked into a batch if necessary
            if isinstance(inputs, list):
                # Convert each item to tensor using .detach() to avoid the user warning
                inputs = [i.clone().detach().to(device) if isinstance(i, torch.Tensor) else torch.tensor(i).to(device) for i in inputs]
                inputs = torch.stack(inputs)  # Stack them into a batch tensor
            else:
                inputs = inputs.to(device)  # If inputs is already a tensor, move it to device

            outputs = model(inputs).squeeze()

            # Predict binary labels
            preds = (outputs > 0.5).int()
            test_predictions.extend(preds.tolist())

    # Prepare the submission DataFrame
    submission_df = pd.DataFrame({
        'id': [i for i in test_df['image_id']],
        'predictions': test_predictions
    })

    # Save the predictions to a CSV file
    submission_df.to_csv(submission_file_path, index=False)
    print(f"Submission file saved to {submission_file_path}")
    
    return submission_df

In [64]:
submission_file_path = "submission.csv"
submission_df = predict_and_generate_submission(test_loader=test_loader, best_model_path=best_model_path, submission_file_path=submission_file_path)

Submission file saved to submission.csv


In [None]:
submission_df.head()