<a href="https://colab.research.google.com/github/Eshan133/Hate-Speech-Detection/blob/main/Clip_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. Mounting the Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install transformers datasets accelerate torchvision ftfy regex
!pip install git+https://github.com/openai/CLIP.git

In [None]:
import torch
import pandas as pd
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from transformers import CLIPProcessor, CLIPModel
from torch.optim import AdamW
import os
from sklearn.metrics import accuracy_score, classification_report


---
## 2. Dataset Preparation

In [None]:
class MemeDataset(Dataset):
    def __init__(self, csv_file, processor, is_test=False):
        self.data = pd.read_csv(csv_file)
        self.processor = processor
        self.is_test = is_test

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
      row = self.data.iloc[idx]
      image_path = row['name']
      image = Image.open(image_path).convert('RGB')
      text = row['text']

      item = {
          "image": image,
          "text": text
      }

      if not self.is_test:
          label = int(row['label'])
          return item, label
      else:
          return item



### 2.1 Collate Function

In [None]:
from transformers import CLIPProcessor

processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def collate_fn(batch):
    # Gather texts and images separately
    texts = [item[0]["text"] for item in batch]
    images = [item[0]["image"] for item in batch]
    labels = torch.tensor([item[1] for item in batch])

    # Use processor.__call__ with batching + padding + truncation
    encoded_inputs = processor(
        text=texts,
        images=images,
        return_tensors="pt",
        padding=True,
        truncation=True
    )

    return encoded_inputs, labels


### 2.3 Dataset Path

In [None]:
train_csv = '/content/drive/MyDrive/Hate Speech Competition/Task_A/train_data.csv'
test_csv = '/content/drive/MyDrive/Hate Speech Competition/Task_A/test_data.csv'
val_csv = '/content/drive/MyDrive/Hate Speech Competition/Task_A/val_data.csv'

----
## 3. Defining Clip Model

In [None]:
from torch import nn

class MemeCLIPClassifier(nn.Module):
    def __init__(self, model_name="openai/clip-vit-base-patch32", num_labels=2):
        super(MemeCLIPClassifier, self).__init__()
        self.clip = CLIPModel.from_pretrained(model_name)
        self.classifier = nn.Sequential(
            nn.Linear(self.clip.config.projection_dim * 2, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_labels)
        )

    def forward(self, input_ids, pixel_values, attention_mask):
        outputs = self.clip(input_ids=input_ids,
                            pixel_values=pixel_values,
                            attention_mask=attention_mask,
                            return_dict=True)

        image_embeds = outputs.image_embeds
        text_embeds = outputs.text_embeds
        combined = torch.cat((image_embeds, text_embeds), dim=1)

        return self.classifier(combined)


### 3.1 Freezing layers

In [None]:
def freeze_clip_layers(model):
    for name, param in model.clip.named_parameters():
        if not any(layer in name for layer in ["visual.transformer.layers.11", "text_model.encoder.layers.11"]):
            param.requires_grad = False


### 3.2 Initializing

In [None]:
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

train_dataset = MemeDataset(train_csv, processor)
val_dataset = MemeDataset(val_csv, processor)
test_dataset = MemeDataset(test_csv, processor, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MemeCLIPClassifier()
freeze_clip_layers(model)
model.to(device)

optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-5)
criterion = nn.CrossEntropyLoss()


---
## 4. Training and Evaluation Loop

### 4.1 Training Loop

In [None]:
from tqdm.notebook import tqdm

def train(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0.0
    progress_bar = tqdm(dataloader, desc="Training", leave=False)

    for inputs, labels in progress_bar:
        inputs = {k: v.to(device) for k, v in inputs.items()}
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(**inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader)
    return avg_loss


In [None]:
import torchmetrics
from sklearn.metrics import accuracy_score, f1_score, classification_report, roc_auc_score

### 4.2 Evaluation Loop

In [None]:
def evaluate(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating", leave=False):
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)

            outputs = model(**inputs)  # logits
            probs = torch.softmax(outputs, dim=1)  # shape: [batch_size, 2]
            preds = torch.argmax(probs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs[:, 1].cpu().numpy())  # probs for class 1 (hate)

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    try:
        auc = roc_auc_score(all_labels, all_probs)
    except ValueError:
        auc = float('nan')

    report = classification_report(all_labels, all_preds, target_names=['non-hate', 'hate'])

    return acc, f1, auc, report


---
## 5. Training the model

In [None]:
import torch
import os

best_val_f1 = 0
patience = 3
counter = 0
save_path = "best_model.pt"

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=1, factor=0.5, verbose=True)


In [None]:
for epoch in range(10):  # or any number
    print(f"\nEpoch: {epoch + 1}")

    train_loss = train(model, train_loader, optimizer, criterion)
    val_acc, val_f1, val_auc, val_report = evaluate(model, val_loader)

    print(f"Train Loss: {train_loss:.4f} | Val Accuracy: {val_acc:.4f} | F1: {val_f1:.4f} | AUROC: {val_auc:.4f}")
    print(val_report)

    # Scheduler step
    scheduler.step(val_f1)  # or val_auc or -train_loss

    # Check for improvement
    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        counter = 0
        torch.save(model.state_dict(), save_path)
        print(f"✅ Saved new best model with F1: {best_val_f1:.4f}")
    else:
        counter += 1
        print(f"⏳ No improvement. Early stopping counter: {counter}/{patience}")

    if counter >= patience:
        print("⛔ Early stopping triggered.")
        break


------

---
## 6. Prediction

In [None]:
import json

def predict_on_test(model, test_loader):
    model.eval()
    predictions = []

    with torch.no_grad():
        for inputs, meta in tqdm(test_loader, desc="Predicting on test set"):
            inputs = {k: v.to(device) for k, v in inputs.items()}

            outputs = model(**inputs)
            preds = torch.argmax(torch.softmax(outputs, dim=1), dim=1)

            for idx, pred in zip(meta['index'], preds.cpu().numpy()):
                predictions.append({"index": idx, "label": int(pred)})

    return predictions


In [None]:
predictions = predict_on_test(model, test_loader)
predictions_sorted = sorted(predictions, key=lambda x: x['index'])

---
## 7. Json for submission

In [None]:
with open("submission.json", "w") as f:
    json.dump(predictions_sorted, f, indent=4)

!zip -j ref.zip submission.json
