# Train Image Processing using Hugging Face Dataset

Using this deepfashion dataset with different clothes and poses. Just going to test if this is the dataset i really want.
Data Set:
https://huggingface.co/datasets/lirus18/deepfashion

In [2]:
from datasets import load_dataset

# Load the DeepFashion dataset
dataset = load_dataset("lirus18/deepfashion", split="train")

print(dataset)


Generating train split: 100%|██████████| 13679/13679 [00:22<00:00, 608.03 examples/s]


Dataset({
    features: ['image', 'openpose', 'cloth', 'caption'],
    num_rows: 13679
})


In [4]:
print(dataset.features)
sample = dataset[0]
print(sample)

{'image': Image(mode=None, decode=True, id=None), 'openpose': Image(mode=None, decode=True, id=None), 'cloth': Image(mode=None, decode=True, id=None), 'caption': Value(dtype='string', id=None)}
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=768x1024 at 0x11767E300>, 'openpose': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=768x1024 at 0x116AECF20>, 'cloth': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=768x1024 at 0x117676210>, 'caption': ''}


Processing the Dataset

In [10]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset

In [11]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ResNet input size
    transforms.ToTensor(),          # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize

])

In [12]:
def preprocess_sample(sample):
    # Select the 'image' or 'cloth' field
    image = sample['image']  # Or use sample['cloth']
    image = transform(image)
    
    # Example: Assign labels (you might need to define these based on your task)
    label = 0  # Replace with actual label logic
    
    return image, label
print(preprocess_sample(sample))

(tensor([[[1.8550, 1.8550, 1.8550,  ..., 1.8893, 1.8893, 1.8893],
         [1.8550, 1.8550, 1.8550,  ..., 1.8893, 1.8893, 1.8893],
         [1.8550, 1.8550, 1.8550,  ..., 1.8893, 1.8893, 1.8893],
         ...,
         [1.7180, 1.7180, 1.7352,  ..., 1.8722, 1.8722, 1.8722],
         [1.7180, 1.7180, 1.7352,  ..., 1.8722, 1.8722, 1.8893],
         [1.7180, 1.7180, 1.7180,  ..., 1.8722, 1.8722, 1.8722]],

        [[2.0084, 2.0084, 2.0084,  ..., 2.0434, 2.0434, 2.0434],
         [2.0084, 2.0084, 2.0084,  ..., 2.0434, 2.0434, 2.0434],
         [2.0084, 2.0084, 2.0084,  ..., 2.0434, 2.0434, 2.0434],
         ...,
         [1.8333, 1.8333, 1.8508,  ..., 1.9734, 1.9734, 1.9734],
         [1.8333, 1.8333, 1.8508,  ..., 1.9734, 1.9734, 1.9909],
         [1.8333, 1.8333, 1.8333,  ..., 1.9734, 1.9734, 1.9734]],

        [[2.1520, 2.1520, 2.1520,  ..., 2.2217, 2.2217, 2.2217],
         [2.1520, 2.1520, 2.1520,  ..., 2.2217, 2.2217, 2.2217],
         [2.1520, 2.1520, 2.1520,  ..., 2.2217, 2.2217, 2

In [13]:
class DeepFashionDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        # Get the sample
        sample = self.dataset[idx]
        
        # Extract image and label
        image = sample['image']  # Main image
        caption = sample['caption']  # Label/description
        
        # Apply transformations
        if self.transform:
            image = self.transform(image)
        
        # Assuming labels are in the 'caption' field, convert to numeric or category
        label = int(caption) if caption.isdigit() else 0  # Example: You can customize this
        return image, label

Prepare Data Loaders

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, random_split
from datasets import load_dataset
from PIL import Image

# Split the dataset (e.g., 80% train, 10% val, 10% test)
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders
batch_size = 32
train_loader = DataLoader(DeepFashionDataset(train_dataset, transform=transform), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(DeepFashionDataset(val_dataset, transform=transform), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(DeepFashionDataset(test_dataset, transform=transform), batch_size=batch_size, shuffle=False)


Defining the ResNet Model

Pre trained model ResNet50 is a deep learning model that uses a convolutional neural network (CNN) to perform visual recognition and image classification

Convolution layers: Apply filters to images to detect patterns, edges, and textures 

Residual blocks: Act as shortcuts that allow the model to skip layers, which helps with training and information flow 

Fully connected layers: Map the learned features to the final output classes

In [15]:
import torch.nn as nn
from torchvision import models

# Load ResNet model
model = models.resnet50(pretrained=True)

# Modify the final fully connected layer for the number of classes in your dataset
num_classes = 10  # Update this with the actual number of classes
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/Param/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:06<00:00, 16.4MB/s]


Train the Model

RESNET50 taking too long will use RESNet 18

In [16]:
import torch.optim as optim

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")

Epoch 1/10, Loss: 0.008139423889087838
Epoch 2/10, Loss: 4.4229061599012034e-08
Epoch 3/10, Loss: 2.358400320323899e-08


KeyboardInterrupt: 

ResNet18

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)
num_classes = 10  # Set this to the number of classes in your dataset
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Replace final FC layer
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /Users/Param/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:03<00:00, 12.7MB/s]


In [21]:
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader)}")

    # Validation Step
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_accuracy = 100 * correct / total
    print(f"Validation Loss: {val_loss/len(val_loader):.4f}, Accuracy: {val_accuracy}%")



Epoch 1/10, Loss: 8.572518341061818e-09
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 2/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 3/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 4/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 5/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 6/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 7/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 8/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 9/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%
Epoch 10/10, Loss: 0.0
Validation Loss: 0.0000, Accuracy: 100.0%


Testing the model

In [22]:
model.eval()
test_loss = 0.0

correct = 0

total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total

print(f"Test Loss: {test_loss/len(test_loader):.4f}, Accuracy: {test_accuracy:.2f}%")


Test Loss: 0.0000, Accuracy: 100.00%


Save Model

In [23]:
torch.save(model.state_dict(), "resnet18_deepfashion.pth")

Test this Training to do image processing

In [24]:
# Check unique categories in the dataset
categories = set(sample["caption"] for sample in dataset)
print(f"Categories: {categories}")

KeyboardInterrupt: 