In [1]:
import torch
from torch.utils.data import DataLoader as TorchDataLoader

import sys, os
sys.path.append(os.path.abspath(".."))


from DL_vs_HateSpeech.models.model_v0 import ModelV0
from DL_vs_HateSpeech.loading_data.dataloader import DataLoader
from DL_vs_HateSpeech.training.training import (
    collate_fn,
    get_optimizer_and_criterion,
    train_epoch
)
from DL_vs_HateSpeech.evaluation.evaluate import evaluate
from DL_vs_HateSpeech.plots.plot_loss import plot_losses

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# Hyperparameters
BATCH_SIZE = 16
LR = 1e-5
EPOCHS = 10

# Load Data
train_dataset = DataLoader(type="train")
val_dataset = DataLoader(type="val")

train_loader = TorchDataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = TorchDataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)


In [6]:
from models.easyocr import ocr_pipeline
from models.tesseract import tesseract_pipeline

for images, texts, labels in train_loader:
    images[0].show()
    print("TEXT", texts[0])
    print("OCR", ocr_pipeline(images[0]))
    print("TESSERACT", tesseract_pipeline(images[0]))
    break

TEXT Bishop Earl Walker Jackson Sr . has issued a plea for all Christians to leave the Democrat Party Democrats boo 'd God 3 times at 2012 convention ! Democrats support abortion , and place Planned Parenthood clinics in minority neighborhoods . *Democrats reject the Bibical Family structure . *Democrats are openly hostile to those who express their Christian Value *Hillary says we must give up some of our principles *Democrats are hostile toward the display of a cross and public prayer . Democrats have turned their backs on God ! 
OCR Bishop Earl Walker Jackson Sr has issued a plea for all Chnstians to lcave the Democrat Party; [Jenuxrals bmn d (id Times a 2012 cmtention' [cmxtal: wppN abunion_ and place Pfanned Parentlkxxl clinics in minorit} ncighhxrhaxrfs_ [Jemctais rcjcct she Bibical amily stnctute _ [Jemiarals are openly hatil Iv thos' #ho express thcir hristian laltees Hillary Mfs #€ mnust Ef} e UD Mic' (f our principks taught hy scrinture (infrngement on [ Irxt Amendment) [Jetx

In [None]:
# Initialize Model, Optimizer, Loss
model = ModelV0(clip_model_type="32").to(device)
optimizer, criterion = get_optimizer_and_criterion(model, lr=LR)

# Training and evaluation loop
train_losses = []
val_losses = []

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")

    # Train
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    print(f"Train Loss: {train_loss:.4f}")
    train_losses.append(train_loss)

    # Evaluation loss and accuracy
    val_loss, val_accuracy = evaluate(model, val_loader, criterion, device)
    print(f"Val Loss: {val_loss:.4f}")
    print(f"Val Accuracy: {val_accuracy * 100:.2f}%")
    val_losses.append(val_loss)

# Plot at the end
plot_losses(train_losses, val_losses)


config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

KeyboardInterrupt: 