In [3]:
# ==================================================================================
#  üöÄ HATEFUL MEME DETECTION - KAGGLE ENTERPRISE PIPELINE (FINAL FIX)
#  Features: Dual GPU | Mixed Precision | Focal Loss | Auto-GloVe
# ==================================================================================

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms, models
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
import json
import collections
import nltk
import random
import glob
from nltk.tokenize import word_tokenize
from tqdm.notebook import tqdm
from torch.cuda.amp import GradScaler, autocast

# Fix truncated images error
ImageFile.LOAD_TRUNCATED_IMAGES = True

# ==========================================
# 1. UTILITIES & CLASSES
# ==========================================
class Vocabulary:
    def __init__(self, freq_threshold=2):
        self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
        self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
        self.freq_threshold = freq_threshold
    def __len__(self): return len(self.itos)
    def build_vocabulary(self, sentence_list):
        frequencies = collections.Counter()
        idx = 4
        for sentence in sentence_list:
            for word in word_tokenize(str(sentence).lower()):
                frequencies[word] += 1
        for word, count in frequencies.items():
            if count >= self.freq_threshold:
                self.stoi[word] = idx; self.itos[idx] = word; idx += 1
    def numericalize(self, text):
        return [self.stoi.get(t, 3) for t in word_tokenize(str(text).lower())]

def load_glove_embeddings(vocab, glove_path):
    print(f"üîå Loading GloVe from: {glove_path}")
    embeddings_index = {}
    with open(glove_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            embeddings_index[values[0]] = np.asarray(values[1:], dtype='float32')
    matrix = np.zeros((len(vocab), 300))
    hits = 0
    for word, i in vocab.stoi.items():
        v = embeddings_index.get(word)
        if v is not None: matrix[i] = v; hits += 1
    print(f"   ‚úÖ GloVe Loaded: {hits} words matched.")
    return torch.tensor(matrix, dtype=torch.float32)

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
    def forward(self, inputs, targets):
        bce_loss = self.bce(inputs, targets)
        pt = torch.exp(-bce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * bce_loss
        return focal_loss.mean()

class MMHSDataset(Dataset):
    def __init__(self, json_path, img_dir, vocab, transform=None, limit=None):
        self.img_dir = img_dir
        self.vocab = vocab
        self.transform = transform
        self.data = []
        if json_path and os.path.exists(json_path):
            with open(json_path, 'r') as f: raw_data = json.load(f)
            all_items = list(raw_data.items())
            if limit:
                random.shuffle(all_items)
                all_items = all_items[:limit]
            for k, v in all_items:
                labels = v.get('labels', [])
                if not labels: continue
                label = 1 if sum(labels) >= 2 else 0
                img_name = f"{k}.jpg"
                if os.path.exists(os.path.join(img_dir, img_name)):
                    self.data.append((img_name, v.get('tweet_text', ""), label))
    def __len__(self): return len(self.data)
    def __getitem__(self, idx):
        img_name, text, label = self.data[idx]
        img_path = os.path.join(self.img_dir, img_name)
        try: image = Image.open(img_path).convert("RGB")
        except: image = Image.new('RGB', (224, 224))
        if self.transform: image = self.transform(image)
        tokens = self.vocab.numericalize(text)
        tokens = (tokens + [0]*60)[:60]
        return image, torch.tensor(tokens, dtype=torch.long), torch.tensor(label, dtype=torch.float32)

class FacebookDataset(Dataset):
    def __init__(self, json_path, img_dir, vocab, transform=None):
        self.df = pd.read_json(json_path, lines=True)
        self.img_dir = img_dir
        self.vocab = vocab
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['img'])
        try: image = Image.open(img_path).convert("RGB")
        except: image = Image.new('RGB', (224, 224))
        if self.transform: image = self.transform(image)
        tokens = self.vocab.numericalize(row['text'])
        tokens = (tokens + [0]*60)[:60]
        return image, torch.tensor(tokens, dtype=torch.long), torch.tensor(row['label'], dtype=torch.float32)

# ==========================================
# 2. MODEL
# ==========================================
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        full_layers = list(resnet.children())[:-1] 
        self.backbone = nn.Sequential(*full_layers)
        count = 0
        for param in self.backbone.parameters():
            if count < 100: param.requires_grad = False
            else: param.requires_grad = True
            count += 1
        self.fc = nn.Sequential(nn.Flatten(), nn.Linear(2048, 512), nn.BatchNorm1d(512), nn.ReLU())
    def forward(self, x): return self.fc(self.backbone(x))

class TextEncoder(nn.Module):
    def __init__(self, vocab_size, embed_weights):
        super().__init__()
        if embed_weights is not None:
            self.embedding = nn.Embedding.from_pretrained(embed_weights, freeze=False, padding_idx=0)
        else:
            self.embedding = nn.Embedding(vocab_size, 300, padding_idx=0)
        self.lstm = nn.LSTM(300, 256, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(512, 512)
        self.dropout = nn.Dropout(0.3)
    def forward(self, x):
        _, (h, _) = self.lstm(self.embedding(x))
        return self.dropout(self.fc(torch.cat((h[-2], h[-1]), dim=1)))

class TrojanModel(nn.Module):
    def __init__(self, vocab_size, embed_weights):
        super().__init__()
        self.vis = VisualEncoder()
        self.txt = TextEncoder(vocab_size, embed_weights)
        self.head = nn.Sequential(
            nn.Linear(1024, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.4),
            nn.Linear(256, 1)
        )
    def forward(self, img, txt):
        return self.head(torch.cat((self.vis(img), self.txt(txt)), dim=1))

# ==========================================
# 3. MAIN PIPELINE
# ==========================================
def run_kaggle_training():
    print("üöÄ INITIALIZING TRAINING PIPELINE...")
    
    # --- AUTO-DETECT PATHS INSIDE FUNCTION ---
    def find_file(filename, search_path):
        for root, dirs, files in os.walk(search_path):
            if filename in files: return os.path.join(root, filename)
        return None

    DATA_ROOT = '/kaggle/input'
    
    # 1. FIND DATASETS
    FB_TRAIN = find_file("train.jsonl", DATA_ROOT)
    if not FB_TRAIN: raise FileNotFoundError("‚ùå Could not find train.jsonl")
    FB_ROOT = os.path.dirname(FB_TRAIN)
    FB_IMG_DIR = os.path.join(FB_ROOT, 'img')
    if not os.path.exists(FB_IMG_DIR):
        sample = find_file("01235.png", FB_ROOT)
        if sample: FB_IMG_DIR = os.path.dirname(sample)

    MMHS_GT = find_file("MMHS150K_GT.json", DATA_ROOT)
    MMHS_IMG_DIR = None
    if MMHS_GT:
        MMHS_ROOT = os.path.dirname(MMHS_GT)
        if os.path.exists(os.path.join(MMHS_ROOT, 'img_resized')):
            MMHS_IMG_DIR = os.path.join(MMHS_ROOT, 'img_resized')
        else:
            MMHS_IMG_DIR = os.path.join(MMHS_ROOT, 'img')

    # 2. FIND GLOVE (THE FIX IS HERE)
    GLOVE_PATH = None
    possible_glove = [
        '/kaggle/input/glove-global-vectors-for-word-representation/glove.6B.300d.txt',
        '/kaggle/input/glove6b300dtxt/glove.6B.300d.txt',
        'glove.6B.300d.txt'
    ]
    for p in possible_glove:
        if os.path.exists(p): GLOVE_PATH = p; break
    
    if not GLOVE_PATH:
        print("‚¨áÔ∏è Downloading GloVe (Fallback)...")
        os.system("wget -q http://nlp.stanford.edu/data/glove.6B.zip")
        os.system("unzip -q -o glove.6B.zip")
        GLOVE_PATH = 'glove.6B.300d.txt'

    print(f"   ‚úÖ Data Found: {FB_TRAIN}")
    print(f"   ‚úÖ GloVe Found: {GLOVE_PATH}")

    # CONFIG
    BATCH_SIZE = 128
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"   üöÄ Accelerator: {DEVICE} (GPUs: {torch.cuda.device_count()})")

    nltk.download('punkt', quiet=True)

    # --- BUILD ---
    transform = transforms.Compose([
        transforms.Resize((224, 224)), transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    print("üöß Building Vocabulary...")
    df_fb = pd.read_json(FB_TRAIN, lines=True)
    vocab = Vocabulary()
    vocab.build_vocabulary(df_fb['text'].tolist())
    
    glove = load_glove_embeddings(vocab, GLOVE_PATH)
    model = TrojanModel(len(vocab), glove)
    
    if torch.cuda.device_count() > 1:
        print("‚ö° Dual GPU Activated.")
        model = nn.DataParallel(model)
        
    model = model.to(DEVICE)
    criterion = FocalLoss()
    scaler = GradScaler()
    
    # --- STAGE 1 ---
    if MMHS_GT and MMHS_IMG_DIR:
        print("\n=== STAGE 1: PRE-TRAINING (30k) ===")
        mmhs_ds = MMHSDataset(MMHS_GT, MMHS_IMG_DIR, vocab, transform, limit=30000)
        mmhs_loader = DataLoader(mmhs_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
        opt = optim.Adam(model.parameters(), lr=1e-4)
        
        model.train()
        loop = tqdm(mmhs_loader)
        for img, txt, lbl in loop:
            img, txt, lbl = img.to(DEVICE), txt.to(DEVICE), lbl.to(DEVICE)
            opt.zero_grad()
            with autocast():
                loss = criterion(model(img, txt).squeeze(), lbl)
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            loop.set_postfix(loss=loss.item())

    # --- STAGE 2 ---
    print("\n=== STAGE 2: FINE-TUNING ===")
    fb_train = FacebookDataset(FB_TRAIN, FB_IMG_DIR, vocab, transform)
    dev_path = find_file("dev_seen.jsonl", DATA_ROOT) or find_file("dev.jsonl", DATA_ROOT)
    fb_dev = FacebookDataset(dev_path, FB_IMG_DIR, vocab, transform)
    
    # Balancing
    targets = fb_train.df['label'].values
    weights = [1./len(targets[targets==0]), 1./len(targets[targets==1])]
    samples_weight = [weights[int(t)] for t in targets]
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    
    train_loader = DataLoader(fb_train, batch_size=BATCH_SIZE, sampler=sampler, num_workers=4, pin_memory=True)
    val_loader = DataLoader(fb_dev, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)
    
    opt = optim.AdamW(model.parameters(), lr=5e-5, weight_decay=1e-4)
    # FIX: verbose removed
    sched = optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1, patience=1)
    
    best_acc = 0.0
    for epoch in range(10):
        model.train()
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
        for img, txt, lbl in loop:
            img, txt, lbl = img.to(DEVICE), txt.to(DEVICE), lbl.to(DEVICE)
            opt.zero_grad()
            with autocast():
                loss = criterion(model(img, txt).squeeze(), lbl)
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            loop.set_postfix(loss=loss.item())
            
        # Eval
        model.eval()
        correct = 0; total = 0; val_loss = 0
        with torch.no_grad():
            for img, txt, lbl in val_loader:
                img, txt, lbl = img.to(DEVICE), txt.to(DEVICE), lbl.to(DEVICE)
                with autocast():
                    out = model(img, txt).squeeze()
                    val_loss += criterion(out, lbl).item()
                preds = (torch.sigmoid(out) > 0.5).float()
                correct += (preds == lbl).sum().item()
                total += lbl.size(0)
        
        acc = 100 * correct / total
        avg_val = val_loss/len(val_loader)
        sched.step(avg_val)
        print(f"   Val Acc: {acc:.2f}% | Loss: {avg_val:.4f}")
        
        if acc > best_acc:
            best_acc = acc
            torch.save(model.module.state_dict() if hasattr(model, 'module') else model.state_dict(), 'model_best.pth')
            print(f"   üíæ Saved: {acc:.2f}%")

if __name__ == "__main__":
    run_kaggle_training()

üöÄ INITIALIZING TRAINING PIPELINE...
   ‚úÖ Data Found: /kaggle/input/hatefulmemesproject/facebook/data/train.jsonl
   ‚úÖ GloVe Found: glove.6B.300d.txt
   üöÄ Accelerator: cuda (GPUs: 2)
üöß Building Vocabulary...
üîå Loading GloVe from: glove.6B.300d.txt
   ‚úÖ GloVe Loaded: 4793 words matched.
‚ö° Dual GPU Activated.

=== STAGE 1: PRE-TRAINING (30k) ===


  scaler = GradScaler()


  0%|          | 0/235 [00:00<?, ?it/s]

  with autocast():



=== STAGE 2: FINE-TUNING ===


Epoch 1:   0%|          | 0/67 [00:00<?, ?it/s]

  with autocast():
  with autocast():


   Val Acc: 50.00% | Loss: 23.7376
   üíæ Saved: 50.00%


Epoch 2:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 25.7989


Epoch 3:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 11.9520


Epoch 4:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 0.9381


Epoch 5:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 16.0829


Epoch 6:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 6.0472


Epoch 7:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 54.60% | Loss: 0.1966
   üíæ Saved: 54.60%


Epoch 8:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 0.8020


Epoch 9:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 50.00% | Loss: 0.7947


Epoch 10:   0%|          | 0/67 [00:00<?, ?it/s]

   Val Acc: 51.60% | Loss: 0.2308


In [4]:
# ==================================================================================
#  üöÄ HATEFUL MEME DETECTION - THE "NUCLEAR OPTION" (OpenAI CLIP)
#  Architecture: ViT-B/32 (Vision Transformer) | Target: >70% Accuracy Start
# ==================================================================================

import os
import sys
import subprocess

# 1. AUTO-INSTALL DEPENDENCIES (Magic Fix)
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

try:
    import clip
    print("‚úÖ CLIP is already installed.")
except ImportError:
    print("‚¨áÔ∏è  Installing OpenAI CLIP & Dependencies (Requires Internet ON)...")
    install("ftfy")
    install("regex")
    install("tqdm")
    install("git+https://github.com/openai/CLIP.git")
    import clip

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageFile
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Fix truncated images error
ImageFile.LOAD_TRUNCATED_IMAGES = True

# ==========================================
# 2. CONFIGURATION
# ==========================================
CONFIG = {
    'BATCH_SIZE': 64,       # Large batch for stable gradients
    'EPOCHS': 10,           
    'LR': 1e-4,             # Higher LR since we are only training the head
    'DEVICE': "cuda" if torch.cuda.is_available() else "cpu",
    'MODEL_TYPE': "ViT-B/32", # The standard CLIP model
    'POS_WEIGHT': 2.0       # Penalty for missing Hateful memes (Imbalance Fix)
}

print(f"üöÄ SYSTEM ONLINE: Running on {CONFIG['DEVICE']}")

# ==========================================
# 3. ROBUST DATA LOCATOR
# ==========================================
def find_file(filename, search_path):
    for root, dirs, files in os.walk(search_path):
        if filename in files:
            return os.path.join(root, filename)
    return None

print("üîç Scanning for Datasets...")
DATA_ROOT = '/kaggle/input'

# Find Train File
FB_TRAIN = find_file("train.jsonl", DATA_ROOT)
if not FB_TRAIN:
    raise FileNotFoundError("‚ùå CRITICAL: Could not find 'train.jsonl'. Did you add the dataset?")

# Find Image Directory (Smart Search)
FB_ROOT = os.path.dirname(FB_TRAIN)
FB_IMG_DIR = os.path.join(FB_ROOT, 'img')
if not os.path.exists(FB_IMG_DIR):
    # Fallback: Look for a known image
    sample = find_file("01235.png", FB_ROOT) # Common file in dataset
    if sample: 
        FB_IMG_DIR = os.path.dirname(sample)
    else:
        # Fallback 2: Look in the parent directory
        parent = os.path.dirname(FB_ROOT)
        FB_IMG_DIR = os.path.join(parent, 'img')

print(f"   ‚úÖ Found Train Data: {FB_TRAIN}")
print(f"   ‚úÖ Found Image Dir: {FB_IMG_DIR}")

# ==========================================
# 4. DATASET CLASS (CLIP SPECIALIZED)
# ==========================================
class CLIPMemesDataset(Dataset):
    def __init__(self, json_path, img_dir, preprocess):
        self.df = pd.read_json(json_path, lines=True)
        self.img_dir = img_dir
        self.preprocess = preprocess # CLIP's internal image cleaner

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        # 1. Image
        img_path = os.path.join(self.img_dir, row['img'])
        try:
            image = Image.open(img_path).convert("RGB")
            image = self.preprocess(image) # Returns tensor (3, 224, 224)
        except:
            # Fallback for corrupt images
            image = Image.new('RGB', (224, 224))
            image = self.preprocess(image)

        # 2. Text (Tokenized by CLIP)
        # Truncate to 77 tokens (CLIP limit)
        text = clip.tokenize(str(row['text']), truncate=True).squeeze()
        
        # 3. Label
        label = torch.tensor(row['label'], dtype=torch.float32)
        
        return image, text, label

# ==========================================
# 5. THE MODEL (FROZEN BACKBONE)
# ==========================================
class HatefulCLIPClassifier(nn.Module):
    def __init__(self, model_type, device):
        super().__init__()
        print(f"üß† Loading CLIP {model_type}...")
        self.clip_model, self.preprocess = clip.load(model_type, device=device, jit=False)
        
        # Convert to float32 (CLIP defaults to float16 which can cause NaN in training)
        self.clip_model = self.clip_model.float()
        
        # FREEZE CLIP BACKBONE (Crucial for Stage 1)
        for param in self.clip_model.parameters():
            param.requires_grad = False
            
        # The Classifier Head (Trainable)
        # Input = 512 (Image) + 512 (Text) = 1024
        self.classifier = nn.Sequential(
            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1) # Logits out
        )

    def forward(self, image, text):
        with torch.no_grad():
            img_features = self.clip_model.encode_image(image)
            txt_features = self.clip_model.encode_text(text)
        
        # Concatenate features
        combined = torch.cat((img_features, txt_features), dim=1)
        return self.classifier(combined.float())

# ==========================================
# 6. MAIN TRAINING LOOP
# ==========================================
def run_training():
    # Setup
    model_wrapper = HatefulCLIPClassifier(CONFIG['MODEL_TYPE'], CONFIG['DEVICE'])
    model = model_wrapper.to(CONFIG['DEVICE'])
    preprocess = model_wrapper.preprocess
    
    # Data Loaders
    print("üì¶ Loading Datasets...")
    train_ds = CLIPMemesDataset(FB_TRAIN, FB_IMG_DIR, preprocess)
    
    # Find Dev/Val set
    dev_path = find_file("dev_seen.jsonl", DATA_ROOT) or find_file("dev.jsonl", DATA_ROOT)
    val_ds = CLIPMemesDataset(dev_path, FB_IMG_DIR, preprocess)
    
    train_loader = DataLoader(train_ds, batch_size=CONFIG['BATCH_SIZE'], shuffle=True, num_workers=2)
    val_loader = DataLoader(val_ds, batch_size=CONFIG['BATCH_SIZE'], shuffle=False, num_workers=2)
    
    # Weighted Loss (To fight the 64% Safe bias)
    pos_weight = torch.tensor([CONFIG['POS_WEIGHT']]).to(CONFIG['DEVICE'])
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    
    # Optimizer (Only training the classifier head!)
    optimizer = optim.AdamW(model.classifier.parameters(), lr=CONFIG['LR'], weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)
    
    best_acc = 0.0
    print("\nüî• STARTING CLIP TRAINING PROTOCOL...")
    
    for epoch in range(CONFIG['EPOCHS']):
        model.train()
        train_loss = 0
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}")
        
        for img, txt, lbl in loop:
            img, txt, lbl = img.to(CONFIG['DEVICE']), txt.to(CONFIG['DEVICE']), lbl.to(CONFIG['DEVICE'])
            
            optimizer.zero_grad()
            output = model(img, txt).squeeze()
            loss = criterion(output, lbl)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            loop.set_postfix(loss=loss.item())
            
        # Validation
        model.eval()
        preds_all = []
        labels_all = []
        val_loss = 0
        
        with torch.no_grad():
            for img, txt, lbl in val_loader:
                img, txt, lbl = img.to(CONFIG['DEVICE']), txt.to(CONFIG['DEVICE']), lbl.to(CONFIG['DEVICE'])
                
                out = model(img, txt).squeeze()
                val_loss += criterion(out, lbl).item()
                
                # Sigmoid for probability
                probs = torch.sigmoid(out)
                preds = (probs > 0.5).float()
                
                preds_all.extend(preds.cpu().numpy())
                labels_all.extend(lbl.cpu().numpy())
                
        # Metrics
        acc = accuracy_score(labels_all, preds_all) * 100
        f1 = f1_score(labels_all, preds_all)
        avg_val_loss = val_loss / len(val_loader)
        
        scheduler.step(acc)
        
        print(f"   RESULTS: Acc: {acc:.2f}% | F1: {f1:.4f} | Loss: {avg_val_loss:.4f}")
        
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), 'best_clip_model.pth')
            print(f"   üíæ NEW RECORD! Model Saved.")
            
    print(f"\nüèÜ Final Best Accuracy: {best_acc:.2f}%")

if __name__ == "__main__":
    run_training()

‚¨áÔ∏è  Installing OpenAI CLIP & Dependencies (Requires Internet ON)...
Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
   ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 44.8/44.8 kB 1.9 MB/s eta 0:00:00
Installing collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-0vzliwbd


  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-0vzliwbd


  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: clip
  Building wheel for clip (setup.py): started
  Building wheel for clip (setup.py): finished with status 'done'
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=b00fa8809ff41ceb375249595c710709687fff403c731a06bb936ac867780375
  Stored in directory: /tmp/pip-ephem-wheel-cache-ciwzzfvx/wheels/35/3e/df/3d24cbfb3b6a06f17a2bfd7d1138900d4365d9028aa8f6e92f
Successfully built clip
Installing collected packages: clip
Successfully installed clip-1.0
üöÄ SYSTEM ONLINE: Running on cuda
üîç Scanning for Datasets...
   ‚úÖ Found Train Data: /kaggle/input/hatefulmemesproject/facebook/data/train.jsonl
   ‚úÖ Found Image Dir: /kaggle/input/hatefulmemesproject/facebook/data/img
üß† Loading CLIP ViT-B/32...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 338M/338M [00:03<00:00, 105MiB/s]


üì¶ Loading Datasets...

üî• STARTING CLIP TRAINING PROTOCOL...


Epoch 1:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 58.80% | F1: 0.5402 | Loss: 1.0266
   üíæ NEW RECORD! Model Saved.


Epoch 2:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 59.00% | F1: 0.5330 | Loss: 1.0546
   üíæ NEW RECORD! Model Saved.


Epoch 3:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 58.00% | F1: 0.5291 | Loss: 1.0673


Epoch 4:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 57.60% | F1: 0.5160 | Loss: 1.0933


Epoch 5:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 57.40% | F1: 0.5035 | Loss: 1.1226


Epoch 6:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 57.80% | F1: 0.5012 | Loss: 1.1612


Epoch 7:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 58.20% | F1: 0.5059 | Loss: 1.1709


Epoch 8:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 58.40% | F1: 0.5071 | Loss: 1.1714


Epoch 9:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 58.80% | F1: 0.5142 | Loss: 1.1869


Epoch 10:   0%|          | 0/133 [00:00<?, ?it/s]

   RESULTS: Acc: 58.40% | F1: 0.5117 | Loss: 1.1886

üèÜ Final Best Accuracy: 59.00%
