In [17]:
import pandas as pd
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from pathlib import Path

data_path = '/home/ubuntu/landscape-aesthetics/data/external/scenicornot/scenicornot.metadata.csv'
image_folder = Path('/home/ubuntu/landscape-aesthetics/data/external/scenicornot') 

# Check if the data path exists
if not os.path.exists(data_path):
    raise FileNotFoundError(f"Data file not found at {data_path}")

#read dataset
data = pd.read_csv(data_path)

# Check if the root directory exists
root_dir = '/home/ubuntu/landscape-aesthetics/data/external/scenicornot/'
if not os.path.exists(root_dir):
    raise FileNotFoundError(f"Root directory not found at {root_dir}")

# 预处理数据集，移除无效路径
valid_files = []
missing_files = []
for idx in range(len(data)):
    img_name = data.iloc[idx]['filename']
    image_path = image_folder / Path(img_name)
    if image_path.exists():
        valid_files.append(idx)
    else:
        missing_files.append(image_path)

if missing_files:
    print(f"Missing {len(missing_files)} files:")
    for file in missing_files:
        print(file)

# 只保留有效的文件记录
valid_data = data.iloc[valid_files].reset_index(drop=True)

class ScenicDataset(Dataset):
    def __init__(self, data_frame, root_dir, transform=None):
        self.data_frame = data_frame
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.data_frame.iloc[idx]['filename']) # combine root address with filename
        image_path = self.root_dir / Path(img_name)
        print(f"Loading image: {image_path}")  # 调试信息，打印图片路径

        try:
            image = Image.open(image_path).convert('RGB')
        except FileNotFoundError:
            print(f"File not found: {image_path}, skipping.")
            return None, None
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            return None, None
            
        rating = self.data_frame.iloc[idx]['average']
        
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(rating, dtype=torch.float32)

# centered crop 256*256
data_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(256),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
])

# Create the dataset
image_dataset = ScenicDataset(data_frame=valid_data,
                              root_dir=image_folder,
                              transform=data_transforms)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(image_dataset))
val_size = len(image_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(image_dataset, [train_size, val_size])

# Create data loaders
def collate_fn(batch):
    batch = list(filter(lambda x: x[0] is not None and isinstance(x[0], torch.Tensor), batch))
    return torch.utils.data.dataloader.default_collate(batch)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Print the sizes of the training and validation set
print(f"Training set size: {len(train_loader.dataset)}")
print(f"Validation set size: {len(val_loader.dataset)}")

Missing 1 files:
/home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/76/41/764143_e860c8c4.jpg
Training set size: 169484
Validation set size: 42371


In [18]:
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms

# utilise ResNet50 to extract deep features
class ResNetFeatureExtractor(nn.Module):
    def __init__(self):
        super(ResNetFeatureExtractor, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])  # Remove the last fully connected layer

    def forward(self, x):
        with torch.no_grad():
            features = self.resnet(x)
        return features.squeeze()

feature_extractor = ResNetFeatureExtractor()
feature_extractor.eval()

# define a simple regression model
class RegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(RegressionModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 1)
        )

    def forward(self, x):
        return self.fc(x)

# a more complex one
# class RegressionModel(nn.Module):
#     def __init__(self, input_dim):
#         super(ComplexRegressionModel, self).__init__()
#         self.fc = nn.Sequential(
#             nn.Linear(input_dim, 512),
#             nn.ReLU(),
#             nn.BatchNorm1d(512),
#             nn.Dropout(0.5),
#             nn.Linear(512, 256),
#             nn.ReLU(),
#             nn.BatchNorm1d(256),
#             nn.Dropout(0.5),
#             nn.Linear(256, 128),
#             nn.ReLU(),
#             nn.BatchNorm1d(128),
#             nn.Dropout(0.5),
#             nn.Linear(128, 1)
#         )

#     def forward(self, x):
#         return self.fc(x)

In [20]:
def train_model(feature_extractor, model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, labels in train_loader:
            if images is None:  # skip invalid images
                continue
            features = feature_extractor(images)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss}')

        # 验证模型
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                if images is None:  # jump invalid images
                    continue
                features = feature_extractor(images)
                outputs = model(features)
                loss = criterion(outputs.squeeze(), labels)
                val_loss += loss.item() * images.size(0)

        val_loss /= len(val_loader.dataset)
        print(f'Validation Loss: {val_loss}')

input_dim = 2048  # ResNet50 feature dimension
model = RegressionModel(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# training model
train_model(feature_extractor, model, criterion, optimizer, train_loader, val_loader, num_epochs=10)



Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/geophotos/01/07/69/1076969_9dcb56a7.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/09/11/091122_c9a2b6dc.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/geophotos/01/06/74/1067444_60622350.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/87/39/873918_de0bb6e8.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/46/89/468917_53997b95.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/77/01/770114_d3425b5f.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/67/24/672457_409303d7.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/33/02/330293_ddcb1766.jpg
Loading image: /home/ubuntu/landscape-aesthetics/data/external/scenicornot/photos/53/48/534801_a63f32db.jpg
Loading image:

KeyboardInterrupt: 

In [None]:
# evaluate
def evaluate_model(feature_extractor, model, test_loader):
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for images, labels in test_loader:
            features = feature_extractor(images)
            outputs = model(features)
            loss = criterion(outputs.squeeze(), labels)
            test_loss += loss.item() * images.size(0)

    test_loss /= len(test_loader.dataset)
    print(f'Test Loss: {test_loss}')
    return test_loss

# # save the model
# torch.save(model.state_dict(), 'complex_regression_model.pth')
# print("Model saved to complex_regression_model.pth")
