In [1]:
!pip install transformers torch torchvision pandas scikit-learn matplotlib timm textblob
!python -m textblob.download_corpora

Finished.


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\Startklar\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Startklar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Startklar\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Startklar\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\Startklar\AppData\Roaming\nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Startklar\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_review

In [2]:
#  Imports
import os, json
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from transformers import CLIPProcessor, CLIPModel
from textblob import TextBlob
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from torch.nn.utils.rnn import pad_sequence
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
input_mode = "both"  # or "image", or "text"
dataset_dir = "./data"  # Update path to your dataset

In [4]:
def load_jsonl(file_path):
    with open(file_path, 'r') as f:
        return [json.loads(line) for line in f]

train_data = load_jsonl(os.path.join(dataset_dir, "train.jsonl"))
dev_data   = load_jsonl(os.path.join(dataset_dir, "dev.jsonl"))

df_all = pd.DataFrame(train_data + dev_data)
df_all = df_all.sample(2000, random_state=42)

In [5]:
high_harm_keywords = ["kill", "rape", "nazi", "exterminate", "shoot", "burn", "die", "blood", "bomb"]

def classify_harm_level(row):
    if row["label"] == 0:
        return 0  # Low
    text = row["text"].lower()
    polarity = TextBlob(text).sentiment.polarity
    if any(word in text for word in high_harm_keywords) or polarity < -0.5:
        return 2  # High
    return 1  # Medium

df_all["harm_label"] = df_all.apply(classify_harm_level, axis=1)

In [6]:
print("Class Distribution:\n", df_all["harm_label"].value_counts())

Class Distribution:
 harm_label
0    1268
1     630
2     102
Name: count, dtype: int64


In [7]:
# Load CLIP
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clip_model.to(device)


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'



CLIPModel(
  (text_model): CLIPTextTransformer(
    (embeddings): CLIPTextEmbeddings(
      (token_embedding): Embedding(49408, 512)
      (position_embedding): Embedding(77, 512)
    )
    (encoder): CLIPEncoder(
      (layers): ModuleList(
        (0-11): 12 x CLIPEncoderLayer(
          (self_attn): CLIPSdpaAttention(
            (k_proj): Linear(in_features=512, out_features=512, bias=True)
            (v_proj): Linear(in_features=512, out_features=512, bias=True)
            (q_proj): Linear(in_features=512, out_features=512, bias=True)
            (out_proj): Linear(in_features=512, out_features=512, bias=True)
          )
          (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (mlp): CLIPMLP(
            (activation_fn): QuickGELUActivation()
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
          )
          (layer_norm2): LayerNorm((512,), eps=1e

In [8]:
class HatefulMemeDataset(Dataset):
    def __init__(self, dataframe, root_img_dir, processor, mode="both"):
        self.data = dataframe.reset_index(drop=True)
        self.root_img_dir = root_img_dir
        self.processor = processor
        self.mode = mode

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        text = item["text"]
        label = int(item["harm_label"])

        image = None
        if self.mode in ["both", "image"]:
            image_file = item["img"].replace("img/", "")
            image_path = os.path.join(self.root_img_dir, image_file)
            image = Image.open(image_path).convert("RGB")

        if self.mode == "both":
            inputs = self.processor(
                text=[text], images=image, return_tensors="pt", padding="max_length",
                truncation=True, max_length=77, return_attention_mask=True
            )
        elif self.mode == "image":
            inputs = self.processor(images=image, return_tensors="pt")
        elif self.mode == "text":
            inputs = self.processor(
                text=[text], return_tensors="pt", padding="max_length",
                truncation=True, max_length=77, return_attention_mask=True
            )
        else:
            raise ValueError("Invalid input_mode")

        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
        return inputs, torch.tensor(label)


In [9]:
def clip_collate_fn(batch):
    inputs_batch, labels = zip(*batch)
    keys = inputs_batch[0].keys()
    collated = {}

    for key in keys:
        tensors = [inputs[key] for inputs in inputs_batch]
        if key in ["input_ids", "attention_mask"]:
            collated[key] = pad_sequence(tensors, batch_first=True, padding_value=0)
        else:
            collated[key] = torch.stack(tensors)
    return collated, torch.tensor(labels)

In [10]:
class FlexibleCLIPClassifier(nn.Module):
    def __init__(self, clip_model, mode="both"):
        super().__init__()
        self.clip = clip_model
        self.mode = mode
        self.classifier = nn.Sequential(
            nn.Linear(self.clip.config.projection_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 3)  # 3 classes: Low, Medium, High
        )

    def forward(self, **kwargs):
        if self.mode == "both":
            text_features = self.clip.get_text_features(
                input_ids=kwargs["input_ids"], attention_mask=kwargs["attention_mask"]
            )
            image_features = self.clip.get_image_features(pixel_values=kwargs["pixel_values"])
            embedding = (text_features + image_features) / 2
        elif self.mode == "image":
            embedding = self.clip.get_image_features(pixel_values=kwargs["pixel_values"])
        elif self.mode == "text":
            embedding = self.clip.get_text_features(
                input_ids=kwargs["input_ids"], attention_mask=kwargs["attention_mask"]
            )
        else:
            raise ValueError("Invalid mode.")
        return self.classifier(embedding)

In [12]:
#  Split & Load Data
df_train, df_val = train_test_split(df_all, test_size=0.1, random_state=42)

train_dataset = HatefulMemeDataset(df_train, os.path.join(dataset_dir, "img"), clip_processor, mode=input_mode)
val_dataset = HatefulMemeDataset(df_val, os.path.join(dataset_dir, "img"), clip_processor, mode=input_mode)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=clip_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, collate_fn=clip_collate_fn)

#  Load Model + Weighted Loss
model = FlexibleCLIPClassifier(clip_model, mode=input_mode).to(device)



In [14]:
import numpy as np  

# Compute class weights (FIXED)
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array([0, 1, 2]),
    y=df_all["harm_label"]
)
weights = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = nn.CrossEntropyLoss(weight=weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)


In [15]:
#  Training
def train_model(model, dataloader, epochs=3):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        print(f"\n🌀 Starting Epoch {epoch+1}/{epochs}")
        for i, (batch) in enumerate(dataloader):
            inputs, labels = batch
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)

            outputs = model(**inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            if (i+1) % 10 == 0:
                print(f"  Batch {i+1} | Loss: {loss.item():.4f}")

        print(f" Epoch {epoch+1} Complete - Total Loss: {total_loss:.4f}")

train_model(model, train_loader, epochs=3)


🌀 Starting Epoch 1/3
  Batch 10 | Loss: 1.2326
  Batch 20 | Loss: 1.1274
  Batch 30 | Loss: 0.8664
  Batch 40 | Loss: 0.7557
  Batch 50 | Loss: 0.9154
  Batch 60 | Loss: 1.8435
  Batch 70 | Loss: 1.4652
  Batch 80 | Loss: 0.9020
  Batch 90 | Loss: 0.8894
  Batch 100 | Loss: 0.7776
  Batch 110 | Loss: 1.9605
  Batch 120 | Loss: 0.8285
  Batch 130 | Loss: 1.0702
  Batch 140 | Loss: 0.7007
  Batch 150 | Loss: 0.7042
  Batch 160 | Loss: 0.8022
  Batch 170 | Loss: 0.8982
  Batch 180 | Loss: 0.6535
  Batch 190 | Loss: 1.0750
  Batch 200 | Loss: 0.7981
  Batch 210 | Loss: 1.2212
  Batch 220 | Loss: 0.6875
  Batch 230 | Loss: 0.8044
  Batch 240 | Loss: 0.7472
  Batch 250 | Loss: 0.6844
  Batch 260 | Loss: 0.8814
  Batch 270 | Loss: 1.1361
  Batch 280 | Loss: 0.8956
  Batch 290 | Loss: 0.7338
  Batch 300 | Loss: 1.2684
  Batch 310 | Loss: 0.9462
  Batch 320 | Loss: 0.7824
  Batch 330 | Loss: 0.7941
  Batch 340 | Loss: 0.9402
  Batch 350 | Loss: 0.9022
  Batch 360 | Loss: 0.9097
  Batch 370 | L

In [16]:
def evaluate_model(model, dataloader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in dataloader:
            inputs, labels = batch
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)

            outputs = model(**inputs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    print("\n📊 Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=["Low", "Medium", "High"]))

evaluate_model(model, val_loader)


📊 Classification Report:
              precision    recall  f1-score   support

         Low       0.71      0.78      0.74       124
      Medium       0.53      0.44      0.48        64
        High       0.50      0.42      0.45        12

    accuracy                           0.65       200
   macro avg       0.58      0.55      0.56       200
weighted avg       0.64      0.65      0.64       200



In [17]:
torch.save(model.state_dict(), "full_new_model.pt")
print("✅ Model saved to full_model.pt")

✅ Model saved to full_model.pt
