In [None]:
import cv2
import numpy as np
import os

def is_scene_change(prev_frame, curr_frame, threshold):
    """
    Compares two grayscale frames using histogram correlation.
    Returns True if the similarity is below the threshold, indicating a scene change.
    """

    # Compute grayscale histograms for both frames
    prev_hist = cv2.calcHist([prev_frame], [0], None, [256], [0, 256])
    curr_hist = cv2.calcHist([curr_frame], [0], None, [256], [0, 256])
    
    # Normalize the histograms so they are comparable
    cv2.normalize(prev_hist, prev_hist)
    cv2.normalize(curr_hist, curr_hist)

    # Compare histograms using correlation; closer to 1 means similar
    similarity = cv2.compareHist(prev_hist, curr_hist, cv2.HISTCMP_CORREL)

    # If similarity is below threshold, it indicates a scene change
    return similarity < threshold

# Open video file
cap = cv2.VideoCapture("videos/lipbalms.mp4")
prev_frame = None # Store previous frame for comparison
frame_id = 0 # Track the current frame number
scene_id = 0 # Track the number of detected scenes


# Create a directory to save scene images
os.makedirs('scenes_6')

# Loop through video frames
while True:
    ret, frame = cap.read() # Read next frame
    if not ret:
        break # Exit loop if video ends or fails to read

    # Convert current frame to grayscale for histogram analysis
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Skip comparison for the first frame
    if prev_frame is None:
        prev_frame = gray
        continue

    # Detect scene change by comparing current and previous frame
    if is_scene_change(prev_frame, gray, threshold=0.8):
        print(f"Scene changed at frame {frame_id}")

        # Save frame as an image in the output directory
        cv2.imwrite(f"scenes_6/scene_{scene_id:03d}.jpg", frame)
        scene_id += 1
    
    # Update previous frame and frame counter
    prev_frame = gray
    frame_id += 1

# Release the video capture object
cap.release()

In [None]:
import os
from openai import OpenAI
import csv
from PIL import Image
import base64
from io import BytesIO
from dotenv import load_dotenv

# Load environment variables (here, OPENAI_API_KEY)
load_dotenv()

# Initialize OpenAI client (uses key from env variable)
client = OpenAI()

# === Directory of scene images ===
scene_dir = "scenes_6"  # e.g. "scene_frames/"
output_csv = "brands_detected6.csv"
products = set()

# === Prompt to use ===
prompt = (
    "Identify and list all branded or commercial products visible in this image."
    "Don't give a complete description of what you have identified. Just return the product with brand name."
    "for example - 'Pepsi can', 'Doritos chips', 'Reebok tracksuit', 'Fruity loops cereal', etc"
    # "include the full product name like 'Maybelline Fit Me Blush' or 'Maybelline Super Stay Foundation'. Avoid generic brand names like 'Maybelline Fit Me' without stating what the product is. Do not guess, but infer based on visible packaging and design."
    # "Pay attention to packaging — tube, bottle, compact, etc. Avoid assuming based on product line names like 'Fit Me' or 'Dewy + Smooth'."
    "Use your best judgment, but avoid listing products that are not visibly present. "
    "If a product is partially obscured but clearly identifiable by packaging, include it. "
    "Do not guess products that are not visually indicated."
    "If there is no product visible in the image, simply return None"
)

# === Function to convert image to base64 ===
def pil_image_to_base64(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode()

# === Process each image ===
results = []
for filename in sorted(os.listdir(scene_dir)):
    if not filename.lower().endswith((".jpg", ".png", ".jpeg")):
        continue

    image_path = os.path.join(scene_dir, filename)
    image = Image.open(image_path).convert("RGB")
    image_b64 = pil_image_to_base64(image) #  Convert to base64 for API

    print(f"🧠 Analyzing {filename}...")

    try:
        response = client.chat.completions.create(
            model="gpt-4o", # Vision-capable model
            messages=[
                {"role": "user", "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image_url", "image_url": {
                        "url": f"data:image/jpeg;base64,{image_b64}"
                    }}
                ]}
            ],
            temperature=0.2
        )

        answer = response.choices[0].message.content.strip()
        results.append((filename, answer))
        print(f"✅ {filename}: {answer}")
        products.add(answer)

    except Exception as e:
        print(f"❌ Error processing {filename}: {e}")
        results.append((filename, "ERROR"))

# === Save to CSV ===
with open(output_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Frame", "Detected Brands & Products"])
    writer.writerows(results)

print(f"\n🎉 Done! Results saved to: {output_csv}")


In [28]:
print(products)

{'None', 'E.L.F. Cosmetics Hydrating Core Lip Shine', 'e.l.f. lipstick', 'Conscious Chemist Glossy Butter Balm', 'Kiko Milano lip balm', 'Conscious Chemist Glossy Peptide Lip Butter Balm', 'Kiko Milano Coloured Lip Balm'}


In [29]:
prompt2 = f'''

You are given a list or block of text that may contain repeated, messy, or slightly varied product names.

Your task is to extract a clean, alphabetically sorted list of unique product names from it.

Instructions:

Ignore values like "None", "N/A", or empty entries.

Split multiple products if they appear in one string separated by commas or dashes.

Normalize duplicates with slight variations. For example, "Maybelline Master Chrome" and "Maybelline Master Chrome Highlighter" should be merged as "Maybelline Master Chrome Highlighter".

Do not include vague labels like just "Maybelline Fit Me" — prefer specific ones like "Maybelline Fit Me Blush" or "Maybelline Fit Me Dewy + Smooth Primer".

Do not make up products. Only include what's visibly or clearly listed.

Return the final result as a plain list of comma separated strings (not as a JSON object or explanation).

List: {products}
'''

response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "user", "content": prompt2    
                }
            ],
            temperature=0.2
        )

answer = response.choices[0].message.content.strip()

In [34]:
## FINAL RESULT ##

print('Products in the video are: ', answer)

Products in the video are:  Conscious Chemist Glossy Butter Balm, Conscious Chemist Glossy Peptide Lip Butter Balm, E.L.F. Cosmetics Hydrating Core Lip Shine, Kiko Milano Coloured Lip Balm, Kiko Milano lip balm, e.l.f. lipstick
