In [None]:

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from openai import OpenAI
from PIL import Image
from io import BytesIO
from base64 import b64encode
from sklearn.metrics import classification_report, confusion_matrix

# Configuration set-up
API_KEY = "" # API here
client = OpenAI(api_key=API_KEY)


# Paths
CSV_PATH = "/content/drive/MyDrive/Colab Notebooks/Projects/CSVs/test.csv"
NPY_DIR = "/content/drive/MyDrive/Colab Notebooks/Projects/npy_segments_unimodal"
SAVE_DIR = "/content/drive/MyDrive/Colab Notebooks/Projects/results/LLM Results"
SAVE_NAME = "gpt4o_segment_predictions.csv"
NUM_FRAMES = 10
IMAGE_SIZE = (224, 224)

# Image encoding function
def encode_frame_to_base64(frame):
    if frame.dtype != np.uint8:
        frame = (frame * 255).astype(np.uint8)
    if frame.ndim == 2 or frame.shape[-1] != 3:
        frame = np.stack([frame] * 3, axis=-1)
    img = Image.fromarray(frame).resize(IMAGE_SIZE)
    buffered = BytesIO()
    img.save(buffered, format="PNG")
    img_bytes = buffered.getvalue()
    encoded = b64encode(img_bytes).decode("utf-8")
    return {
        "type": "image_url",
        "image_url": {"url": f"data:image/png;base64,{encoded}"}
    }

# Load CSV
df = pd.read_csv(CSV_PATH)
df["Segment ID"] = df["Segment ID"].astype(str)
results = []

# Process each segment
for _, row in tqdm(df.iterrows(), total=len(df)):
    segment_id = row["Segment ID"]
    true_label = int(row["Violence label(video)"]) if "Violence label(video)" in row else int(row["Violence label"])
    npy_path = os.path.join(NPY_DIR, f"{segment_id}.npy")

    if not os.path.exists(npy_path):
        print(f"[Missing] {segment_id}")
        continue

    try:
        frames = np.load(npy_path)
        total_frames = len(frames)
        indices = np.linspace(0, total_frames - 1, min(total_frames, NUM_FRAMES), dtype=int)
        sampled_frames = frames[indices]

        encoded_images = [encode_frame_to_base64(f) for f in sampled_frames]

        messages = [
            {
                "role": "system",
                "content": "You are a video analysis assistant. Given several frames from a video segment, determine whether any violence occurs. If yes, list the frame numbers where it happens."
            },
            {
                "role": "user",
                "content": [
                    *encoded_images,
                    {
                        "type": "text",
                        "text": "These are frames from a short surveillance video segment. Is there any violent action? If so, which frames (e.g. 1, 4, 8) show violence? If none, say 'None'."
                    }
                ]
            }
        ]

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            temperature=0.2,
            max_tokens=200
        )

        reply = response.choices[0].message.content
        gpt_pred_label = 1 if "frame" in reply.lower() or "yes" in reply.lower() else 0

        results.append({
            "segment_id": segment_id,
            "gpt_pred_label": gpt_pred_label,
            "true_label": true_label,
            "gpt_response": reply
        })

    except Exception as e:
        print(f"[Error] {segment_id}: {e}")
        results.append({
            "segment_id": segment_id,
            "gpt_pred_label": "error",
            "true_label": true_label,
            "gpt_response": str(e)
        })

# Save results
os.makedirs(SAVE_DIR, exist_ok=True)
df_out = pd.DataFrame(results)
save_path = os.path.join(SAVE_DIR, SAVE_NAME)
df_out.to_csv(save_path, index=False)
print(f"\n Saved predictions to: {save_path}")

# Evaluate predictions
df_valid = df_out[df_out["gpt_pred_label"] != "error"].copy()
df_valid["gpt_pred_label"] = df_valid["gpt_pred_label"].astype(int)
df_valid["true_label"] = df_valid["true_label"].astype(int)

print("\n Confusion Matrix:")
print(confusion_matrix(df_valid["true_label"], df_valid["gpt_pred_label"]))

print("\n Classification Report:")
print(classification_report(df_valid["true_label"], df_valid["gpt_pred_label"], target_names=["Non-violent", "Violent"]))



 87%|████████▋ | 285/326 [25:07<09:38, 14.10s/it]

[Error] 271_2: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-jwXG9OABYnsPLwBFf44af35b on tokens per min (TPM): Limit 30000, Used 30000, Requested 7734. Please try again in 15.467s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


 97%|█████████▋| 316/326 [27:37<01:22,  8.22s/it]

[Error] 283_5: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-jwXG9OABYnsPLwBFf44af35b on tokens per min (TPM): Limit 30000, Used 30000, Requested 7734. Please try again in 15.467s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}


100%|██████████| 326/326 [28:25<00:00,  5.23s/it]



 Saved predictions to: /content/drive/MyDrive/Colab Notebooks/Projects/results/LLM Results/gpt4o_segment_predictions.csv

 Confusion Matrix:
[[155  27]
 [ 66  76]]

 Classification Report:
              precision    recall  f1-score   support

 Non-violent       0.70      0.85      0.77       182
     Violent       0.74      0.54      0.62       142

    accuracy                           0.71       324
   macro avg       0.72      0.69      0.69       324
weighted avg       0.72      0.71      0.70       324

