In [None]:
!pip install split-folders

In [None]:
import os
import splitfolders

# 1. Define your base path (Update 'YourUsername'!)
base_path = r'base_path\Character prediction'

# 2. Set your input and output directories
input_folder = os.path.join(base_path, "./data/witcher_pics")
output_folder = os.path.join(base_path, "./data/witcher_output")

# Check if input folder exists before proceeding
if os.path.exists(input_folder):
    print(f"Found input folder at: {input_folder}")
else:
    print("Error: Could not find the input folder on your Desktop. Check the path name!")

In [None]:
# Split with a ratio of (Train, Validation)
# Seed 42 ensures that if you run this again, you get the exact same split
splitfolders.ratio(input_folder, output=output_folder, seed=42, ratio=(.8, .2))

print("--- Split Complete ---")
print(f"Check your Desktop: {output_folder} should now contain 'train' and 'val' folders.")

In [None]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# 1. Define the transformations (Augmentation for Train, pure for Val)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Standard for ResNet
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# 2. Point to your split folders
data_dir = r'base_path\Character prediction\split_folder'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}

dataloaders = {x: DataLoader(image_datasets[x], batch_size=16, shuffle=True)
               for x in ['train', 'val']}

print(f"Classes found: {image_datasets['train'].classes}") # Should be ['geralt', 'no_geralt']

In [None]:
import torch.nn as nn

# Load a pre-trained ResNet18
model = models.resnet18(pretrained=True)

# Freeze all layers so we don't overwrite the pre-trained 'vision'
for param in model.parameters():
    param.requires_grad = False

# Replace the last layer (the classifier)
# ResNet18 normally has 512 inputs to its final layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2) # 2 classes: Geralt or Not

# Move to GPU if you have one
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

# Only optimize the parameters of the final layer (model.fc)
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [None]:
import time
import copy

def train_model(model, criterion, optimizer, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model if it's the best one yet
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # Load best model weights and return
    model.load_state_dict(best_model_wts)
    return model

# START THE TRAINING
model_ft = train_model(model, criterion, optimizer, num_epochs=15)

In [None]:
import torch
import os

# Create the 'models' directory if you haven't yet
models_dir = os.path.join(base_path, "models")
os.makedirs(models_dir, exist_ok=True)

# Define the save path
model_path = os.path.join(models_dir, "geralt_classifier_v1.pth")

# Save the weights
torch.save(model_ft.state_dict(), model_path)

print(f"Model saved to: {model_path}")

In [None]:
from PIL import Image

def predict_witcher(image_path, model, device):
    # 1. Load and transform the image
    img = Image.open(image_path).convert('RGB')
    
    # Use the same transforms as your 'val' set
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    img_tensor = preprocess(img).unsqueeze(0).to(device) # Add batch dimension

    # 2. Predict!
    model.eval() # Set to evaluation mode (turns off dropout/batchnorm)
    with torch.no_grad(): # Disables gradient calculation (saves memory/speed)
        outputs = model(img_tensor)
        _, preds = torch.max(outputs, 1)
        
    # 3. Get the class name
    class_names = ['geralt', 'no_geralt'] # Assumes alphabetical order from ImageFolder
    result = class_names[preds[0]]
    
    return result

# --- TEST IT ---
# Change this to a path of a random image on your computer!
test_image = r"img_path.png"
prediction = predict_witcher(test_image, model_ft, device)
print(f"The model thinks this is: {prediction}")

In [None]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim

# 1. UPDATED DATA TRANSFORMS
data_transforms = {
    'train': transforms.Compose([
        # Randomly zooms into 8% to 100% of the image. 
        transforms.RandomResizedCrop(224, scale=(0.08, 1.0)), 
        transforms.RandomHorizontalFlip(),
        # Adds slight blur to some images so the model learns blurred Geralt
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=5)], p=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# 2. Point to your split folders
data_dir = r'base_path\Character prediction\split_folder'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}

dataloaders = {x: DataLoader(image_datasets[x], batch_size=16, shuffle=True)
               for x in ['train', 'val']}

print(f"Classes found: {image_datasets['train'].classes}") # Should be ['geralt', 'no_geralt']

In [None]:
import torch.nn as nn

# Load a pre-trained ResNet18
model = models.resnet18(pretrained=True)

# Freeze all layers so we don't overwrite the pre-trained 'vision'
for param in model.parameters():
    param.requires_grad = False

# Replace the last layer (the classifier)
# ResNet18 normally has 512 inputs to its final layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2) # 2 classes: Geralt or Not

# Move to GPU if you have one
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

# Only optimize the parameters of the final layer (model.fc)
optimizer_v2 = optim.Adam(model.fc.parameters(), lr=0.001)

In [None]:
import time
import copy

def train_model(model, criterion, optimizer, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects.double() / len(image_datasets[phase])

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model if it's the best one yet
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # Load best model weights and return
    model.load_state_dict(best_model_wts)
    return model

# START THE TRAINING
model_v2 = train_model(model, criterion, optimizer_v2, num_epochs=15)

In [None]:
test_image = r"img_path.png"
prediction = predict_witcher(test_image, model_v2, device)
print(f"The model thinks this is: {prediction}")

In [None]:
import torch.nn as nn
import torch.optim as optim

# 1. Load fresh model
model = models.resnet18(pretrained=True)

# 2. THE UNFREEZE LOGIC: Ensure all parameters are trainable
# This allows the model to learn fine-grained details (jawlines, eyes)
for param in model.parameters():
    param.requires_grad = True 

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model = model.to(device)

# 4. TINY LEARNING RATE
# Since the model is unfrozen, we use a very small LR (1e-5) 
# so it doesn't "forget" its general vision knowledge too fast.
optimizer_v3 = optim.Adam(model.parameters(), lr=0.00001)

# 5. RUN TRAINING
model_v3 = train_model(model, criterion, optimizer_v3, num_epochs=14)

In [None]:
test_image = r"img_path.png"
prediction = predict_witcher(test_image, model_v3, device)
print(f"The model thinks this is: {prediction}")

In [None]:
from torchvision import transforms

# THE ULTIMATE DATA TRANSFORMS
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.08, 1.0)),
        transforms.RandomHorizontalFlip(),
        # THE KEY ADDITION:
        # distortion_scale=0.4 gives a healthy amount of "warping" 
        # to simulate low/high camera angles. p=0.5 means it happens half the time.
        transforms.RandomPerspective(distortion_scale=0.4, p=0.5),
        transforms.RandomApply([transforms.GaussianBlur(kernel_size=5)], p=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
# 1. Load fresh model
model = models.resnet18(pretrained=True)

# 2. THE UNFREEZE LOGIC: Ensure all parameters are trainable
# This allows the model to learn fine-grained details (jawlines, eyes)
for param in model.parameters():
    param.requires_grad = True 

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model = model.to(device)

optimizer_v4 = optim.Adam(model.parameters(), lr=0.00001)
model_v4 = train_model(model, criterion, optimizer_v4, num_epochs=15)

In [None]:
test_image = r"img_path.png"
prediction = predict_witcher(test_image, model_v4, device)
print(f"The model thinks this is: {prediction}")

In [None]:
import torch
import os

# Create the 'models' directory if you haven't yet
models_dir = os.path.join(base_path, "models")
os.makedirs(models_dir, exist_ok=True)

# Define the save path
model_path = os.path.join(models_dir, "geralt_classifier_v4.pth")

# Save the weights
torch.save(model_v4.state_dict(), model_path)

print(f"Model saved to: {model_path}")