In [1]:
from tokenizer import Tokenizer
from dataset import IMDB_Dataset

MAX_LENGTH = 512

tokenizer = Tokenizer()
train_dataset = IMDB_Dataset(data_path="imdb_train.csv", max_length=MAX_LENGTH, tokenizer=tokenizer)
val_dataset = IMDB_Dataset(data_path="imdb_val.csv", max_length=MAX_LENGTH, tokenizer=tokenizer)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torch.utils.data import DataLoader
import torch
from model import SemanticClassifier

BATCH_SIZE = 32

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
model = SemanticClassifier(num_classes=2, vocab_size=tokenizer.get_vocab_size(), max_len=MAX_LENGTH)

device = torch.device(
    "mps" if torch.backends.mps.is_available() else
    "cuda" if torch.cuda.is_available() else
    "cpu"
)

print(f"Using device: {device}")

Using device: mps


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

NUM_EPOCHS = 5

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(NUM_EPOCHS):
    # Training phase
    model.train()
    total_loss = 0
    
    for batch_idx, (input_ids, labels) in enumerate(train_dataloader):
        input_ids = input_ids.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        logits = model(input_ids)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Print loss every 50 batches
        if (batch_idx + 1) % 50 == 0:
            print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Batch {batch_idx+1}/{len(train_dataloader)} | Loss: {loss.item():.4f}")
    
    avg_train_loss = total_loss / len(train_dataloader)
    
    # Validation phase
    model.eval()
    total_val_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for input_ids, labels in val_dataloader:
            input_ids = input_ids.to(device)
            labels = labels.to(device)
            
            logits = model(input_ids)
            loss = criterion(logits, labels)
            total_val_loss += loss.item()
            
            # Calculate accuracy
            predictions = torch.argmax(logits, dim=1)
            correct += (predictions == labels).sum().item()
            total += labels.size(0)
    
    avg_val_loss = total_val_loss / len(val_dataloader)
    accuracy = 100 * correct / total
    
    print(f"\n{'='*60}")
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} Summary:")
    print(f"  Train Loss: {avg_train_loss:.4f}")
    print(f"  Val Loss: {avg_val_loss:.4f}")
    print(f"  Val Accuracy: {accuracy:.2f}%")
    print(f"{'='*60}\n")

Epoch 1/5 | Batch 50/1250 | Loss: 0.6969
Epoch 1/5 | Batch 100/1250 | Loss: 0.7254
Epoch 1/5 | Batch 150/1250 | Loss: 0.5222
Epoch 1/5 | Batch 200/1250 | Loss: 0.6368
Epoch 1/5 | Batch 250/1250 | Loss: 0.5814
Epoch 1/5 | Batch 300/1250 | Loss: 0.5161
Epoch 1/5 | Batch 350/1250 | Loss: 0.3556
Epoch 1/5 | Batch 400/1250 | Loss: 0.4550
Epoch 1/5 | Batch 450/1250 | Loss: 0.5349
Epoch 1/5 | Batch 500/1250 | Loss: 0.5553
Epoch 1/5 | Batch 550/1250 | Loss: 0.5064
Epoch 1/5 | Batch 600/1250 | Loss: 0.5143
Epoch 1/5 | Batch 650/1250 | Loss: 0.5507
Epoch 1/5 | Batch 700/1250 | Loss: 0.3834
Epoch 1/5 | Batch 750/1250 | Loss: 0.3897
Epoch 1/5 | Batch 800/1250 | Loss: 0.2981
Epoch 1/5 | Batch 850/1250 | Loss: 0.3828
Epoch 1/5 | Batch 900/1250 | Loss: 0.2065
Epoch 1/5 | Batch 950/1250 | Loss: 0.4079
Epoch 1/5 | Batch 1000/1250 | Loss: 0.3402
Epoch 1/5 | Batch 1050/1250 | Loss: 0.2943
Epoch 1/5 | Batch 1100/1250 | Loss: 0.5267
Epoch 1/5 | Batch 1150/1250 | Loss: 0.4760
Epoch 1/5 | Batch 1200/1250 | L

In [4]:
# save model

torch.save(model.state_dict(), 'semantic_classifier_model.pth')