In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, models
from timm import create_model
import os
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as nprom torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToPILImage
import pandas as pd
from pathlib import Path
import random
import matplotlib.pyplot as plt
from torchvision.io import read_image
from torchvision.transforms import ToPILImage

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_dir = 'train/train'

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    # transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        
        for label, class_name in enumerate(os.listdir(root_dir)):
            class_folder = os.path.join(root_dir, class_name)
            if os.path.isdir(class_folder):
                for img_name in os.listdir(class_folder):
                    img_path = os.path.join(class_folder, img_name)
                    if img_path.endswith(('.jpg', '.jpeg', '.png')):
                        self.image_paths.append(img_path)
                        self.labels.append(label)
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        image = read_image(img_path).float() 
        image = ToPILImage()(image) 
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

dataset = ImageDataset(root_dir=train_dir, transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

model = create_model('convnextv2_large', pretrained=True, num_classes=len(os.listdir(train_dir)))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

def calculate_accuracy(loader, model, device):
    model.eval()  
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

epochs = 10 

for epoch in range(epochs):
    model.train() 
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", ncols=100):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    val_accuracy = calculate_accuracy(val_loader, model, device)

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")


  return F.linear(input, self.weight, self.bias)
Epoch 1/10: 100%|███████████████████████████████████████████████████| 72/72 [01:38<00:00,  1.37s/it]


Epoch [1/10], Loss: 2.0790, Validation Accuracy: 54.37%


Epoch 2/10: 100%|███████████████████████████████████████████████████| 72/72 [01:25<00:00,  1.18s/it]


Epoch [2/10], Loss: 1.2032, Validation Accuracy: 64.86%


Epoch 3/10: 100%|███████████████████████████████████████████████████| 72/72 [01:13<00:00,  1.02s/it]


Epoch [3/10], Loss: 0.8391, Validation Accuracy: 70.28%


Epoch 4/10: 100%|███████████████████████████████████████████████████| 72/72 [01:17<00:00,  1.08s/it]


Epoch [4/10], Loss: 0.7086, Validation Accuracy: 70.28%


Epoch 5/10: 100%|███████████████████████████████████████████████████| 72/72 [01:20<00:00,  1.12s/it]


Epoch [5/10], Loss: 0.6568, Validation Accuracy: 69.93%


Epoch 6/10: 100%|███████████████████████████████████████████████████| 72/72 [01:14<00:00,  1.03s/it]


Epoch [6/10], Loss: 0.6377, Validation Accuracy: 69.76%


Epoch 7/10: 100%|███████████████████████████████████████████████████| 72/72 [01:14<00:00,  1.04s/it]


Epoch [7/10], Loss: 0.6289, Validation Accuracy: 69.23%


Epoch 8/10: 100%|███████████████████████████████████████████████████| 72/72 [01:14<00:00,  1.03s/it]


Epoch [8/10], Loss: 0.6264, Validation Accuracy: 70.98%


Epoch 9/10: 100%|███████████████████████████████████████████████████| 72/72 [01:09<00:00,  1.04it/s]


Epoch [9/10], Loss: 0.6185, Validation Accuracy: 70.28%


Epoch 10/10: 100%|██████████████████████████████████████████████████| 72/72 [01:20<00:00,  1.12s/it]


Epoch [10/10], Loss: 0.6191, Validation Accuracy: 69.58%


In [3]:
test_dir = 'test/test' 

class_names = sorted(os.listdir(train_dir))

class TestImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        
        for img_name in os.listdir(root_dir):
            img_path = os.path.join(root_dir, img_name)
            if img_path.endswith(('.jpg', '.jpeg', '.png')):
                self.image_paths.append(img_path)
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = read_image(img_path).float()
        image = ToPILImage()(image)
        if self.transform:
            image = self.transform(image)
            
        return image, img_path 

test_dataset = TestImageDataset(root_dir=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval() 

image_names = []
predicted_labels = []

with torch.no_grad():
    for images, image_paths in tqdm(test_loader, desc="Predicting", ncols=100):
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        for i in range(len(image_paths)):
            image_name = Path(image_paths[i]).stem
            image_names.append(image_name)
            predicted_class_name = class_names[predicted[i].item()]
            predicted_labels.append(predicted_class_name)

df = pd.DataFrame({
    'name': image_names,
    'label': predicted_labels
})

df.to_csv('predictions.csv', index=False)

Predicting: 100%|███████████████████████████████████████████████████| 39/39 [00:14<00:00,  2.65it/s]
