In [None]:
import os
import random
import numpy as np
import cv2
from PIL import Image, ImageStat
from deepface import DeepFace
from colorthief import ColorThief
from ultralytics import YOLO
from easyocr import Reader
from skimage import filters
from skimage.morphology import disk
import matplotlib.pyplot as plt
import pandas as pd

# --- Configuration ---
THUMBNAILS_DIR = r"<PATH_TO_YOUR_THUMBNAILS_DIRECTORY>"
OUTPUT_DIR = r"<PATH_TO_YOUR_OUTPUT_DIRECTORY>"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def get_sampled_thumbnails(sample_size=20):
    files = [f for f in os.listdir(THUMBNAILS_DIR) if f.lower().endswith(('png', 'jpg', 'jpeg'))]
    sampled_files = random.sample(files, min(sample_size, len(files)))
    return [os.path.join(THUMBNAILS_DIR, f) for f in sampled_files]

# --- Model Initialization ---
yolo_model = YOLO('yolov8x.pt')
easyocr_reader = Reader(['en'], gpu=False)

def overlay_text(image, text, pos=(10, 30), color=(0,255,0), font_scale=1, thickness=2):
    img = image.copy()
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(img, text, pos, font, font_scale, color, thickness, cv2.LINE_AA)
    return img

def save_overlay(img, image_path, step):
    base = os.path.splitext(os.path.basename(image_path))[0]
    out_path = os.path.join(OUTPUT_DIR, f"{base}_{step}.jpg")
    cv2.imwrite(out_path, img)
    return out_path

def show_overlay(img, title="Overlay"):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(5,5))
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.title(title)
    plt.show()

def print_analysis(results):
    for k, v in results.items():
        print(f"{k}: {v}")

# --- Analysis functions with overlays and saving ---

def analyze_color_and_brightness(image_path):
    img = cv2.imread(image_path)
    results = {}
    try:
        img_pil = Image.open(image_path)
        stat = ImageStat.Stat(img_pil)
        results['brightness'] = sum(stat.mean) / len(stat.mean)
    except:
        results['brightness'] = None
    try:
        ct = ColorThief(image_path)
        results['dominant_color'] = ct.get_color(quality=1)
    except:
        results['dominant_color'] = None
    text = f"Brightness: {results['brightness']:.1f} | Dominant: {results['dominant_color']}"
    img_overlay = overlay_text(img, text)
    overlay_path = save_overlay(img_overlay, image_path, "color_brightness")
    results['overlay_path'] = overlay_path
    return img_overlay, results

def analyze_visual_complexity(image_path):
    img = cv2.imread(image_path)
    results = {}
    try:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 100, 200)
        edge_img = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
        overlay = cv2.addWeighted(img, 0.7, edge_img, 0.7, 0)
        text = f"Edges: {np.sum(edges>0)}"
        overlay = overlay_text(overlay, text)
        img_overlay = overlay
        results['edge_count'] = int(np.sum(edges>0))
    except:
        img_overlay = img
        results['edge_count'] = None
    overlay_path = save_overlay(img_overlay, image_path, "visual_complexity")
    results['overlay_path'] = overlay_path
    return img_overlay, results

def analyze_objects_and_concreteness(image_path):
    img = cv2.imread(image_path)
    results = {}
    try:
        yolo_detections = yolo_model(image_path)[0]
        yolo_class_names = yolo_detections.names
        yolo_objects = [yolo_class_names[int(cls_id)] for cls_id in yolo_detections.boxes.cls.tolist()]
        results['yolo_objects'] = yolo_objects
        results['object_count'] = len(yolo_objects)
        for box, cls_id in zip(yolo_detections.boxes.xyxy.tolist(), yolo_detections.boxes.cls.tolist()):
            x1, y1, x2, y2 = map(int, box)
            label = yolo_class_names[int(cls_id)]
            cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
            cv2.putText(img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    except:
        results['yolo_objects'] = None
        results['object_count'] = None
    overlay_path = save_overlay(img, image_path, "objects")
    results['overlay_path'] = overlay_path
    return img, results

def analyze_faces_and_emotions(image_path):
    img = cv2.imread(image_path)
    results = {}
    try:
        analysis = DeepFace.analyze(img_path=image_path, actions=['age','gender','emotion'])
        faces = analysis if isinstance(analysis, list) else [analysis]
        results['face_count'] = len(faces)
        for face in faces:
            region = face.get('region')
            if region:
                x, y, w, h = region['x'], region['y'], region['w'], region['h']
                cv2.rectangle(img, (x, y), (x+w, y+h), (0,0,255), 2)
                label = f"{face.get('dominant_emotion','')} {face.get('age','')}"
                cv2.putText(img, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
        results['dominant_emotion'] = faces[0].get('dominant_emotion') if faces else None
        results['age'] = faces[0].get('age') if faces else None
        results['gender'] = faces[0].get('gender') if faces else None
    except:
        results['face_count'] = 0
        results['dominant_emotion'] = None
        results['age'] = None
        results['gender'] = None
    overlay_path = save_overlay(img, image_path, "faces")
    results['overlay_path'] = overlay_path
    return img, results

def analyze_text_all(image_path):
    img = cv2.imread(image_path)
    results = {}
    try:
        results_ocr = easyocr_reader.readtext(image_path)
        texts = []
        for (bbox, text, conf) in results_ocr:
            pts = np.array(bbox).astype(int)
            cv2.polylines(img, [pts], True, (255,0,0), 2)
            cv2.putText(img, text, tuple(pts[0]), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,0,0), 2)
            texts.append(text)
        results['easyocr_text'] = texts
    except:
        results['easyocr_text'] = None
    overlay_path = save_overlay(img, image_path, "text")
    results['overlay_path'] = overlay_path
    return img, results


In [None]:
# --- Main: Run on 20 random thumbnails and save all output ---

img_paths = get_sampled_thumbnails(20)
all_results = []

for idx, image_path in enumerate(img_paths):
    print(f"\n--- Thumbnail {idx+1}: {os.path.basename(image_path)} ---")
    row_result = {'image_path': image_path}
    
    # Color & Brightness
    img_overlay, results = analyze_color_and_brightness(image_path)
    show_overlay(img_overlay, "Color & Brightness")
    print_analysis(results)
    row_result.update({f"color_{k}": v for k, v in results.items()})
    
    # Visual Complexity
    img_overlay, results = analyze_visual_complexity(image_path)
    show_overlay(img_overlay, "Visual Complexity")
    print_analysis(results)
    row_result.update({f"complexity_{k}": v for k, v in results.items()})
    
    # Objects
    img_overlay, results = analyze_objects_and_concreteness(image_path)
    show_overlay(img_overlay, "Objects (YOLO)")
    print_analysis(results)
    row_result.update({f"objects_{k}": v for k, v in results.items()})
    
    # Faces & Emotions
    img_overlay, results = analyze_faces_and_emotions(image_path)
    show_overlay(img_overlay, "Faces & Emotions")
    print_analysis(results)
    row_result.update({f"faces_{k}": v for k, v in results.items()})
    
    # Text
    img_overlay, results = analyze_text_all(image_path)
    show_overlay(img_overlay, "Text (OCR)")
    print_analysis(results)
    row_result.update({f"text_{k}": v for k, v in results.items()})
    
    all_results.append(row_result)

In [None]:
# Save all results to CSV
df = pd.DataFrame(all_results)
csv_path = os.path.join(OUTPUT_DIR, "thumbnail_analysis_results.csv")
df.to_csv(csv_path, index=False)
print(f"\nAll overlays and results saved in {OUTPUT_DIR}")
