In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from torchvision.datasets import ImageFolder

In [2]:
train_dir = "/home/bartek/Kod/PD/praca_dyplomowa/dane/resnet_dane/ready/train"  # Contains 'code' and 'non_code' subfolders
val_dir = "/home/bartek/Kod/PD/praca_dyplomowa/dane/resnet_dane/ready/val"      # Contains 'code' and 'non_code' subfolders

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet expects 224x224 images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet stats
])

In [5]:
train_dataset = ImageFolder(root=train_dir, transform=transform)
val_dataset = ImageFolder(root=val_dir, transform=transform)

In [6]:
# Create data loaders
batch_size = 32  # You can adjust this based on your GPU memory
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [7]:
print(f"Classes: {train_dataset.classes}")
print(f"Number of training examples: {len(train_dataset)}")
print(f"Number of validation examples: {len(val_dataset)}")

Classes: ['code', 'no_code']
Number of training examples: 9044
Number of validation examples: 2262


In [8]:
model = models.resnet18(weights='ResNet18_Weights.IMAGENET1K_V1')

In [9]:
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: code or no_code

In [10]:
model = model.to(device)

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [12]:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=3, verbose=True
)



In [13]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in tqdm(dataloader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [14]:
def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Validation"):
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [15]:
num_epochs = 10
train_losses, train_accs = [], []
val_losses, val_accs = [], []

In [16]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    # Train
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    
    # Validate
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    # Update the learning rate based on validation loss
    scheduler.step(val_loss)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
    print("-" * 50)

Epoch 1/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0259, Train Acc: 0.9910
Val Loss: 0.0039, Val Acc: 0.9991
--------------------------------------------------
Epoch 2/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0041, Train Acc: 0.9985
Val Loss: 0.0014, Val Acc: 0.9996
--------------------------------------------------
Epoch 3/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0045, Train Acc: 0.9990
Val Loss: 0.0049, Val Acc: 0.9969
--------------------------------------------------
Epoch 4/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0037, Train Acc: 0.9987
Val Loss: 0.0049, Val Acc: 0.9978
--------------------------------------------------
Epoch 5/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0004, Train Acc: 1.0000
Val Loss: 0.0006, Val Acc: 1.0000
--------------------------------------------------
Epoch 6/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0007, Train Acc: 0.9998
Val Loss: 0.0005, Val Acc: 1.0000
--------------------------------------------------
Epoch 7/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0016, Train Acc: 0.9996
Val Loss: 0.0025, Val Acc: 0.9987
--------------------------------------------------
Epoch 8/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0032, Train Acc: 0.9992
Val Loss: 0.0064, Val Acc: 0.9978
--------------------------------------------------
Epoch 9/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0067, Train Acc: 0.9978
Val Loss: 0.0019, Val Acc: 0.9996
--------------------------------------------------
Epoch 10/10


Training:   0%|          | 0/283 [00:00<?, ?it/s]

Validation:   0%|          | 0/71 [00:00<?, ?it/s]

Train Loss: 0.0021, Train Acc: 0.9997
Val Loss: 0.0037, Val Acc: 0.9987
--------------------------------------------------


In [19]:
modele = "/home/bartek/Kod/PD/praca_dyplomowa/dane/modele"

In [20]:
model_info = {
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': num_epochs,
    'class_names': ['no_code', 'code'],  # Your class names
}
torch.save(model_info, f'{modele}/resnet18_code_classifier_complete.pth')