In [6]:
import pandas as pd
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

df = pd.read_csv("C:/Deep Learning Project/data/pinterest/train_data.csv") 

class RoomStyleDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = self.dataframe.iloc[idx]["image_path"]
        image = Image.open(image_path).convert("RGB")  # Винаги в RGB формат
        room_type = self.dataframe.iloc[idx]["room_type"]
        style = self.dataframe.iloc[idx]["style"]
        label = f"{room_type}_{style}" 

        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),         
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

train_dataset = RoomStyleDataset(df, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

for images, labels in train_loader:
    print(f"Batch от {len(images)} images")
    print(f"Shape of first image: {images[0].shape}")  # (3, 224, 224)
    print(f"First label: {labels[0]}")
    break


Batch от 32 изображения
Форма на първото изображение: torch.Size([3, 224, 224])
Първи етикет: bathroom_industrial


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
# from torchvision import models
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.data import DataLoader
from sklearn import preprocessing
from tqdm import tqdm

# Encode labels into numerical categories
le = preprocessing.LabelEncoder()
df['encoded_label'] = le.fit_transform(df['room_type'] + '_' + df['style'])
num_classes = len(le.classes_)

# Custom Dataset class with numerical labels
class RoomStyleDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = self.dataframe.iloc[idx]["image_path"]
        image = Image.open(image_path).convert("RGB")
        label = self.dataframe.iloc[idx]["encoded_label"]

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.long)

# Data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create Dataset and DataLoader
train_dataset = RoomStyleDataset(df, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define ResNet18 model
def create_model(num_classes):
    model = resnet18(weights = ResNet18_Weights.DEFAULT)
    # model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

# Training parameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
learning_rate = 0.001
num_epochs = 10

# Create model, loss, and optimizer
model = create_model(num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct / total * 100

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

# Save model
torch.save(model.state_dict(), "C:/Project Deep Learning/models/room_style_model.pth")
print("Model trained and saved as 'room_style_model.pth'")


Epoch 1/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [14:13<00:00, 10.28s/it]


Epoch 1/10, Loss: 2.4502, Accuracy: 25.23%


Epoch 2/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [13:55<00:00, 10.06s/it]


Epoch 2/10, Loss: 1.9596, Accuracy: 34.82%


Epoch 3/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [14:30<00:00, 10.49s/it]


Epoch 3/10, Loss: 1.6898, Accuracy: 42.90%


Epoch 4/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [15:09<00:00, 10.96s/it]


Epoch 4/10, Loss: 1.4729, Accuracy: 49.06%


Epoch 5/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [14:26<00:00, 10.45s/it]


Epoch 5/10, Loss: 1.2318, Accuracy: 57.85%


Epoch 6/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [12:12<00:00,  8.82s/it]


Epoch 6/10, Loss: 1.0218, Accuracy: 66.24%


Epoch 7/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [08:48<00:00,  6.37s/it]


Epoch 7/10, Loss: 0.8060, Accuracy: 73.72%


Epoch 8/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [29:26<00:00, 21.29s/it]


Epoch 8/10, Loss: 0.6372, Accuracy: 80.93%


Epoch 9/10: 100%|██████████████████████████████████████████████████████████████████████| 83/83 [07:27<00:00,  5.39s/it]


Epoch 9/10, Loss: 0.5471, Accuracy: 83.46%


Epoch 10/10: 100%|█████████████████████████████████████████████████████████████████████| 83/83 [07:37<00:00,  5.51s/it]

Epoch 10/10, Loss: 0.4741, Accuracy: 85.42%
Model trained and saved as 'room_style_model.pth'





In [11]:
# Load validation and test data
val_df = pd.read_csv("../data/val_data.csv")
test_df = pd.read_csv("../data/test_data.csv")

# Encode labels for validation and test data using the same encoder as training
val_df["encoded_label"] = le.transform(val_df["room_type"] + "_" + val_df["style"])
test_df["encoded_label"] = le.transform(test_df["room_type"] + "_" + test_df["style"])

# Create validation and test datasets and dataloaders
val_dataset = RoomStyleDataset(val_df, transform=transform)
test_dataset = RoomStyleDataset(test_df, transform=transform)

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [12]:
# Validation Phase
model.eval()
val_loss = 0.0
val_correct = 0
val_total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Statistics
        val_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        val_correct += (predicted == labels).sum().item()
        val_total += labels.size(0)

val_epoch_loss = val_loss / len(val_loader)
val_epoch_accuracy = val_correct / val_total * 100

print(f"Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_accuracy:.2f}%")

Validation Loss: 3.1199, Validation Accuracy: 31.83%


In [13]:
# Test Phase
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Statistics
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == labels).sum().item()
        test_total += labels.size(0)

test_epoch_loss = test_loss / len(test_loader)
test_epoch_accuracy = test_correct / test_total * 100

print(f"Test Loss: {test_epoch_loss:.4f}, Test Accuracy: {test_epoch_accuracy:.2f}%")


Test Loss: 3.0119, Test Accuracy: 32.37%
