In [3]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm  # Import tqdm for progress bar
import os
import re


# Define function: Convert to Ohta color space
def convert_to_ohta(image_rgb):
    R, G, B = image_rgb[:,:,0], image_rgb[:,:,1], image_rgb[:,:,2]
    I1 = (R + G + B) / 3.0  # Brightness component
    I2 = (R - B) / 2.0      # Red-blue difference
    I3 = (2 * G - R - B) / 4.0  # Green-purple difference
    return I1, I2, I3

# Define function: Convert to LST color space
def convert_to_lst(image_rgb):
    R, G, B = image_rgb[:,:,0], image_rgb[:,:,1], image_rgb[:,:,2]
    L = (R + G + B) / 3.0  # Brightness
    S = np.sqrt(((R - G)**2 + (R - B)**2 + (G - B)**2) / 3.0)  # Saturation
    T = (R - B)  # Hue
    return L, S, T

# Set paths
file_location_path = Path.cwd()
project_base_path = file_location_path.parent.parent
ns6_wiki_paths = project_base_path / 'data' / 'processed' / 'landscape_or_not'
image_folder = Path('/home/ubuntu/landscape-aesthetics')
output_folder = project_base_path / 'data' / 'processed' / 'landscape_color_features'

# Ensure the output directory exists
output_folder.mkdir(parents=True, exist_ok=True)

# Pattern to match specific files
pattern = re.compile(r"^ns6_clean_(\d+)\.csv$")

# Iterate over files in the directory
for file_name in tqdm(os.listdir(ns6_wiki_paths), desc="Processing CSV files"):
    file_path = ns6_wiki_paths / file_name
    if file_path.is_file() and pattern.match(file_name):
        label_file_path = Path(file_name)
        labeled_csv_name = label_file_path.with_suffix('.csv')
        result = output_folder / labeled_csv_name
        result.parent.mkdir(parents=True, exist_ok=True)

        # List to store the final results
        results = []

        # Read the CSV file
        data = pd.read_csv(file_path, usecols=['image_path', 'prediction'])
        data = data[data['prediction'] == 1]
        image_paths = data['image_path'].tolist()
        
        # Iterate over each image in the CSV
        for img_path in image_paths:
            image_path = image_folder / img_path
            
            try:
                # Read and process the image
                image = cv2.imread(str(image_path))
                if image is None:
                    print(f"Warning: Unable to load image {img_path}, skipping...")
                    continue  # Skip if image is not loaded

                # Convert BGR to RGB (OpenCV reads in BGR format by default)
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Convert to HSV color space
                image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
                H, S, V = image_hsv[:,:,0], image_hsv[:,:,1], image_hsv[:,:,2]

                # Convert to Ohta color space
                I1, I2, I3 = convert_to_ohta(image_rgb)

                # Convert to LST color space
                L, S_lst, T = convert_to_lst(image_rgb)

                # Append the average values of each color space to the results list
                results.append({
                    'image_path': str(image_path),
                    'H_mean': np.mean(H), 'S_mean': np.mean(S), 'V_mean': np.mean(V),
                    'I1_mean': np.mean(I1), 'I2_mean': np.mean(I2), 'I3_mean': np.mean(I3),
                    'L_mean': np.mean(L), 'S_lst_mean': np.mean(S_lst), 'T_mean': np.mean(T)
                })

            except Exception as e:
                print(f"Error processing image {img_path}: {e}")
                continue  # Skip this image and move to the next one

        # Save results to CSV
        columns = ["image_path", "H_mean", "S_mean", "V_mean", 
                   "I1_mean", "I2_mean", "I3_mean", 
                   "L_mean", "S_lst_mean", "T_mean"]

        df = pd.DataFrame(results, columns=columns)
        df.to_csv(result, index=False)

print("Processing complete. Results saved in:", output_folder)


Processing CSV files:   0%|                             | 0/101 [00:06<?, ?it/s]


KeyboardInterrupt: 