In [2]:
import pandas as pd
import os
from PIL import Image
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from pathlib import Path
import torch.nn.functional as F
import sys
from sklearn.metrics import accuracy_score, recall_score
import random
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
from collections import Counter

In [None]:
file_location_path = Path.cwd()
project_base_path = file_location_path.parent.parent
ns6_wiki_paths = project_base_path / 'data' / 'processed' / 'wikimedia_commons' / 'clean'
image_folder = Path('/home/ubuntu/landscape-aesthetics')

model = models.resnet18(weights=None)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model.load_state_dict(torch.load('../../models/places365_binary_model_resnet18.pth', map_location='cpu', weights_only=True))
model.eval()

data_transforms = transforms.Compose([
        # transforms.Resize((256, 256)),
        transforms.CenterCrop(256),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2)
    ])

pattern = re.compile(r"^ns6_clean_(\d+)\.parquet$")

for file_name in tqdm(os.listdir(ns6_wiki_paths), desc="Processing files"):
    file_path = os.path.join(ns6_wiki_paths, file_name)
    if os.path.isfile(file_path) and pattern.match(file_name):
        if os.path.isfile(file_path):
            labefile_path = Path(file_name)
            labelled_csv_name = labefile_path.with_suffix('.csv')
            result = project_base_path / 'data' / 'processed' / 'landscape_or_not' / labelled_csv_name
    
        result.parent.mkdir(parents = True, exist_ok = True) # create directory for output
        
        data = pd.read_parquet(file_path, columns = ['image_path']) # only path needed
        data = data.head(100)
        image_paths = data.iloc[:, 0].tolist()
    
        predictions = []
        image_results = []
        
        for img_path in image_paths:
            image_path = os.path.join(image_folder, img_path) 
        
            img = data_transforms(Image.open(image_path).convert('RGB')) .unsqueeze(0)
            
            with torch.no_grad():
                output = model(img)
            
            _, predicted = torch.max(output, 1)
            predictions.append(predicted.item())
            image_results.append((img_path, predicted.item()))
        results_df = pd.DataFrame(image_results, columns=['image_path', 'prediction'])
        results_df.to_csv(result, index=False)
        print(f"Results saved to: {labelled_csv_path}")