In [26]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import time
import os
from torchvision.io import read_image
from PIL import Image

In [12]:
# Define the transform
transform = transforms.Compose([
    transforms.Resize(256),              # Resize to 256x256
    transforms.CenterCrop(224),          # Crop to 224x224
    transforms.ToTensor(),               # Convert to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet standards
                         std=[0.229, 0.224, 0.225])
])

In [13]:
# Load the training and testing datasets
train_dataset = datasets.ImageFolder(root='../dataset/training', transform=transform)
test_dataset = datasets.ImageFolder(root='../dataset/testing', transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [14]:
# Checking to make sure we are using our GPU instead of CPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [15]:
# Load the pre-trained ResNet-101 model
model = models.resnet18(weights=True)

# Modify the last fully connected layer to match the number of classes in your dataset
num_classes = 5 
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Use Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.to(device)

def train_model(model, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        epoch_start = time.time()
        running_loss = 0.0
        running_corrects = 0
        total_batches = len(train_loader)
        
        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            
            if (i+1) % 100 == 0:  # Print every 100 batches
                print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{total_batches}], Loss: {loss.item():.4f}')

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        epoch_end = time.time()

    print(f'Epoch [{epoch+1}/{num_epochs}] completed in {(epoch_end - epoch_start):.2f} seconds - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')

    return model

# Train the model
trained_model = train_model(model, criterion, optimizer, num_epochs=25)

Epoch [1/25], Batch [100/1755], Loss: 0.7788
Epoch [1/25], Batch [200/1755], Loss: 0.5523
Epoch [1/25], Batch [300/1755], Loss: 0.5547




Epoch [1/25], Batch [400/1755], Loss: 0.7806
Epoch [1/25], Batch [500/1755], Loss: 0.4273
Epoch [1/25], Batch [600/1755], Loss: 0.5617
Epoch [1/25], Batch [700/1755], Loss: 0.4897
Epoch [1/25], Batch [800/1755], Loss: 0.2220
Epoch [1/25], Batch [900/1755], Loss: 0.3644
Epoch [1/25], Batch [1000/1755], Loss: 0.3539
Epoch [1/25], Batch [1100/1755], Loss: 0.5872
Epoch [1/25], Batch [1200/1755], Loss: 0.4509
Epoch [1/25], Batch [1300/1755], Loss: 0.2939
Epoch [1/25], Batch [1400/1755], Loss: 0.3714
Epoch [1/25], Batch [1500/1755], Loss: 0.2302
Epoch [1/25], Batch [1600/1755], Loss: 0.3799
Epoch [1/25], Batch [1700/1755], Loss: 0.2478
Epoch [2/25], Batch [100/1755], Loss: 0.2331
Epoch [2/25], Batch [200/1755], Loss: 0.2024
Epoch [2/25], Batch [300/1755], Loss: 0.3390
Epoch [2/25], Batch [400/1755], Loss: 0.2719
Epoch [2/25], Batch [500/1755], Loss: 0.1224
Epoch [2/25], Batch [600/1755], Loss: 0.3245
Epoch [2/25], Batch [700/1755], Loss: 0.4523
Epoch [2/25], Batch [800/1755], Loss: 0.3218
Ep

In [16]:
# Save the entire model
torch.save(trained_model, '..\models\PhotoLingo_ResNet18_v1.pth') 

In [17]:
def evaluate_model(model, test_loader):
    model.eval()  # Set the model to evaluation mode

    running_corrects = 0
    total = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            running_corrects += torch.sum(preds == labels.data)
            total += labels.size(0)

    test_acc = running_corrects.double() / total
    print(f'Test Accuracy: {test_acc:.4f}')

# Evaluate the model
evaluate_model(trained_model, test_loader)

Test Accuracy: 0.9384


In [24]:
class UnlabeledDataset(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.images = os.listdir(directory)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.directory, self.images[idx])
        image = Image.open(img_path).convert('RGB')  # Load as PIL Image
        if self.transform:
            image = self.transform(image)
        return image, self.images[idx]

In [27]:
model.eval()

test_dataset = UnlabeledDataset('../dataset/testing_ICDAR', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

predictions = []
for inputs, image_names in test_loader:
    inputs = inputs.to(device)
    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)
    
    class_names = ['Arabic', 'Hindi', 'Japanese', 'Korean', 'Latin'] 
    predicted_classes = [class_names[p] for p in preds]
    
    for img_name, prediction in zip(image_names, predicted_classes):
        predictions.append(f"{img_name},{prediction}")

In [28]:
with open('predictions.txt', 'w') as f:
    for line in predictions:
        f.write(line + '\n')