In [1]:
import os
import time
import requests
import pandas as pd
import numpy as np

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

IMAGE_FOLDER = "pinterest_images"
os.makedirs(IMAGE_FOLDER, exist_ok=True)
CSV_FILE = "pinterest_outfit_data.csv"

def setup_driver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920,1080")
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=options)

def scrape_pinterest_images(category, search_url, max_images=20):
    driver = setup_driver()
    driver.get(search_url)
    time.sleep(8)
    images_data = []
    seen_images = set()
    while len(images_data) < max_images:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        pins = driver.find_elements(By.XPATH, "//div[@data-test-id='pinWrapper']")
        for pin in pins:
            try:
                img_tag = pin.find_element(By.TAG_NAME, "img")
                img_url = img_tag.get_attribute("src")
                if img_url and img_url not in seen_images:
                    seen_images.add(img_url)
                    image_name = f"{category.replace(' ', '_')}_{len(images_data)}.jpg"
                    image_path = os.path.join(IMAGE_FOLDER, image_name)
                    img_data = requests.get(img_url, timeout=10).content
                    with open(image_path, "wb") as img_file:
                        img_file.write(img_data)
                    images_data.append({
                        "category": category,
                        "image_name": image_name
                    })
                    if len(images_data) >= max_images:
                        break
            except Exception:
                continue
        if not pins:
            break
    driver.quit()
    return images_data

def save_to_csv(data):
    df = pd.DataFrame(data)
    df.to_csv(CSV_FILE, index=False, mode='a', header=not os.path.exists(CSV_FILE))
    print(f"Data saved to {CSV_FILE}")

def build_nima_model(input_shape=(224, 224, 3), num_scores=10):
    base_model = MobileNetV2(include_top=False, weights='imagenet', input_shape=input_shape)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.75)(x)
    output = Dense(num_scores, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

def load_nima_model(model_path="nima_model.h5"):
    model = build_nima_model()
    if os.path.exists(model_path):
        model.load_weights(model_path)
        print("Loaded pre-trained NIMA model weights.")
    else:
        print("Pre-trained weights not found; using untrained model (ratings will be random).")
    return model

def preprocess_image(image_path, target_size=(224, 224)):
    img = load_img(image_path, target_size=target_size)
    img = img_to_array(img)
    img = (img / 127.5) - 1.0
    img = np.expand_dims(img, axis=0)
    return img

def rate_image(image_path, model):
    img = preprocess_image(image_path)
    pred = model.predict(img)
    scores = np.arange(1, 11)
    rating = np.sum(pred[0] * scores)
    return rating

if __name__ == "__main__":
    categories_and_urls = [
        {
            "category": "men's casual",
            "url": "https://ar.pinterest.com/search/pins/?q=men%20outfits%20casual&rs=ac&len=11&source_id=WTIf3A6L&eq=men%20outfits&etslf=1610"
        },
        {
            "category": "women's outfit",
            "url": "https://ar.pinterest.com/search/pins/?q=women%20outfits&rs=typed"
        }
    ]
    all_data = []
    for item in categories_and_urls:
        cat = item["category"]
        url = item["url"]
        print(f"Scraping {cat} images from Pinterest...")
        category_data = scrape_pinterest_images(cat, url, max_images=20)
        all_data.extend(category_data)
    nima_model = load_nima_model("nima_model.h5")
    for entry in all_data:
        image_path = os.path.join(IMAGE_FOLDER, entry["image_name"])
        try:
            aesthetic_rating = rate_image(image_path, nima_model)
        except Exception as e:
            print(f"Error rating image {entry['image_name']}: {e}")
            aesthetic_rating = None
        entry["aesthetic_rating"] = aesthetic_rating
    save_to_csv(all_data)
    print("Scraping, rating, and saving complete!")


Scraping men's casual images from Pinterest...
Scraping women's outfit images from Pinterest...
Pre-trained weights not found; using untrained model (ratings will be random).
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 463ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━