In [2]:
import pandas as pd
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import random_split

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Cuda")
else:
    print("CPU")


# Number of epochs
num_epochs = 10

# For storing the loss and accuracy to plot later
train_losses = []
train_accuracies = []



train_csv_path = 'train.csv'
test_csv_path = 'test.csv'

# Load the data
train_df = pd.read_csv(train_csv_path, delimiter='\t', skipinitialspace=True)
test_df = pd.read_csv(test_csv_path, delimiter='\t', skipinitialspace=True)

# Define the dataset class
class FashionDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform
        self.label_mapping = {label: idx for idx, label in enumerate(dataframe.iloc[:, 1].unique())}

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Assuming image ids are in the first column
        img_id = self.dataframe.iloc[idx, 0]
        img_name = os.path.join(self.image_dir, f"{img_id}.jpg")
        image = Image.open(img_name)
        label_name = self.dataframe.iloc[idx, 1]
        label = self.label_mapping[label_name]
        
        if self.transform:
            image = self.transform(image)

        return image, label

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
     
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# Create the dataset
train_dataset = FashionDataset(dataframe=train_df, image_dir='archive\images', transform=transform)
test_dataset = FashionDataset(dataframe=test_df, image_dir='archive\images', transform=transform)

# Create the dataloaders
batch_size = 32
validation_ratio = 0.1
num_train_examples = len(train_dataset)
num_validation_examples = int(num_train_examples * validation_ratio)
num_train_examples -= num_validation_examples
train_subset, validation_subset = random_split(train_dataset, [num_train_examples, num_validation_examples])


train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
validation_loader = DataLoader(validation_subset, batch_size=batch_size, shuffle=False)

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(-1, 128 * 16 * 16)  # Flatten the tensor
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the model
num_classes = 13  
model = SimpleCNN(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Function for the training step
def train(model, criterion, optimizer, dataloader, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (data, targets) in enumerate(dataloader):
        # Move tensors to the configured device
        data = data.to(device)
        targets = targets.to(device)

        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = 100 * correct / total

    print(f'Train loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

def validate(model, criterion, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # No need to track the gradients
        for batch_idx, (data, targets) in enumerate(dataloader):
            data = data.to(device)
            targets = targets.to(device)

            outputs = model(data)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = 100 * correct / total

    print(f'Validation loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')


# Function to evaluate the model on the test set
def test(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in dataloader:
            data = data.to(device)
            targets = targets.to(device)

            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

Cuda


In [3]:
# Function for the validation step

# Add validation to the training epochs
for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    train(model, criterion, optimizer, train_loader, device)
    validate(model, criterion, validation_loader, device)

# Save the model checkpoint
torch.save(model.state_dict(), 'fashion_model.pth')


Epoch 1/10
Train loss: 0.4023, Accuracy: 87.96%
Validation loss: 0.2191, Accuracy: 93.27%
Epoch 2/10
Train loss: 0.1810, Accuracy: 94.46%
Validation loss: 0.2033, Accuracy: 93.94%
Epoch 3/10
Train loss: 0.1103, Accuracy: 96.40%
Validation loss: 0.1839, Accuracy: 95.08%
Epoch 4/10
Train loss: 0.0707, Accuracy: 97.74%
Validation loss: 0.2194, Accuracy: 94.39%
Epoch 5/10
Train loss: 0.0468, Accuracy: 98.57%
Validation loss: 0.2330, Accuracy: 94.56%
Epoch 6/10
Train loss: 0.0374, Accuracy: 98.84%
Validation loss: 0.2777, Accuracy: 94.51%
Epoch 7/10
Train loss: 0.0336, Accuracy: 98.94%
Validation loss: 0.2649, Accuracy: 94.96%
Epoch 8/10
Train loss: 0.0237, Accuracy: 99.26%
Validation loss: 0.3730, Accuracy: 93.97%
Epoch 9/10
Train loss: 0.0237, Accuracy: 99.35%
Validation loss: 0.3689, Accuracy: 94.39%
Epoch 10/10
Train loss: 0.0202, Accuracy: 99.45%
Validation loss: 0.3770, Accuracy: 95.03%


In [7]:
# Load the model (for evaluation)
model.load_state_dict(torch.load('fashion_model.pth'))

# Evaluate the model
test(model, test_loader, device)

Test Accuracy: 28.60%
