In [None]:
from google.colab import drive
drive.mount('/content/drive')
VIDEO_PATH = '/content/drive/MyDrive/DS681_AssignmeAnt4/Warriors-Lakers-Instant-Classic-2021.mp4'
!apt-get -y install ffmpeg
!pip install -q transformers accelerate torch torchvision opencv-python pillow pandas

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.


In [None]:
import cv2
import numpy as np
import os
import torch
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from PIL import Image
import json
import pandas as pd

In [None]:
def detect_play_boundaries(video_path, threshold=35, min_gap=90):
    cap = cv2.VideoCapture(video_path)
    prev_gray = None
    frame_idx = 0
    fps = cap.get(cv2.CAP_PROP_FPS)
    cuts = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if prev_gray is not None:
            diff = cv2.absdiff(gray, prev_gray)
            score = np.mean(diff)
            if score > threshold:
                cuts.append(frame_idx)
        prev_gray = gray
        frame_idx += 1

    cap.release()

    # Convert frames → seconds and filter close cuts
    times = [int(f / fps) for f in cuts]
    filtered = []
    for t in times:
        if not filtered or t - filtered[-1] > min_gap:
            filtered.append(t)
    return filtered

play_times = detect_play_boundaries(VIDEO_PATH)
print(f"Detected {len(play_times)} play boundaries")
print(f"First 10 play times: {play_times[:10]}")

Detected 0 play boundaries
First 10 play times: []


In [None]:
os.makedirs('plays', exist_ok=True)
os.makedirs('frames', exist_ok=True)

segment_duration = 10  # seconds

print(f"\nSegmenting video '{VIDEO_PATH}' into plays")
for i, start_time_sec in enumerate(play_times):
    output_play_filename = f"play_{i:04d}.mp4"
    output_play_path = os.path.join('plays', output_play_filename)
    !ffmpeg -y -ss {start_time_sec} -i {VIDEO_PATH} -t {segment_duration} -c:v libx264 -preset veryfast -crf 23 -c:a aac -b:a 128k {output_play_path}

print("\nExtracting frames from segmented plays...")
for play_file in os.listdir('plays'):
    if not play_file.endswith('.mp4'):
        continue

    name = play_file.replace('.mp4', '')
    out_dir = f'frames/{name}'
    os.makedirs(out_dir, exist_ok=True)
    !ffmpeg -y -i plays/{play_file} -vf fps=1 {out_dir}/%04d.jpg

print("Frame extraction complete.")



Segmenting video '/content/drive/MyDrive/DS681_AssignmeAnt4/Warriors-Lakers-Instant-Classic-2021.mp4' into plays

Extracting frames from segmented plays...
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-l

In [None]:
print("\nLoading Qwen2-VL model...")
model = Qwen2VLForConditionalGeneration.from_pretrained(
    'Qwen/Qwen2-VL-7B-Instruct',
    torch_dtype=torch.float16,
    device_map='auto'
)

processor = AutoProcessor.from_pretrained('Qwen/Qwen2-VL-7B-Instruct')
print("Model loaded successfully.")


Loading Qwen2-VL model...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Model loaded successfully.


In [None]:
results = []

print("\nAnalyzing frames...")
for play in sorted(os.listdir('frames')):
    play_dir = f'frames/{play}'
    if not os.path.isdir(play_dir):
        continue

    frame_files = sorted([f for f in os.listdir(play_dir) if f.endswith('.jpg')])

    # Process first 3 frames per play
    for img_name in frame_files[:3]:
        img_path = os.path.join(play_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        # CORRECT WAY: Use chat template format
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": img_path},  # Use image path or PIL Image
                    {
                        "type": "text",
                        "text": (
                            "You are analyzing a basketball broadcast frame.\n\n"
                            "Identify visible player actions.\n\n"
                            "Return JSON in this format:\n"
                            "[{\n"
                            '  "timestamp": "MM:SS",\n'
                            '  "player": "Name or Unknown",\n'
                            '  "action": "Shoot | Pass | Dribble | Defend | Rebound | Assist",\n'
                            '  "birdseye": "Bird\'s-eye court description"\n'
                            "}]"
                        )
                    }
                ]
            }
        ]

        # Apply chat template
        text = processor.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        # Process with image
        inputs = processor(
            text=[text],
            images=[image],
            padding=True,
            return_tensors="pt"
        )

        inputs = {k: v.to(model.device) for k, v in inputs.items()}

        # Generate
        output = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=False
        )

        # Decode
        decoded = processor.batch_decode(
            output,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False
        )[0]

        results.append({
            'play': play,
            'frame': img_name,
            'analysis': decoded
        })

        print(f"Processed {play}/{img_name}")


The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



Analyzing frames...
Processed play_0000/0001.jpg
Processed play_0000/0002.jpg
Processed play_0000/0003.jpg
Processed play_0001/0001.jpg
Processed play_0001/0002.jpg
Processed play_0001/0003.jpg
Processed play_0002/0001.jpg
Processed play_0002/0002.jpg
Processed play_0002/0003.jpg
Processed play_0003/0001.jpg
Processed play_0003/0002.jpg
Processed play_0003/0003.jpg
Processed play_0004/0001.jpg
Processed play_0004/0002.jpg
Processed play_0004/0003.jpg
Processed play_0005/0001.jpg
Processed play_0005/0002.jpg
Processed play_0005/0003.jpg
Processed play_0006/0001.jpg
Processed play_0006/0002.jpg
Processed play_0006/0003.jpg
Processed play_0007/0001.jpg
Processed play_0007/0002.jpg
Processed play_0007/0003.jpg
Processed play_0008/0001.jpg
Processed play_0008/0002.jpg
Processed play_0008/0003.jpg
Processed play_0009/0001.jpg
Processed play_0009/0002.jpg
Processed play_0009/0003.jpg
Processed play_0010/0001.jpg
Processed play_0010/0002.jpg
Processed play_0010/0003.jpg
Processed play_0011/00

In [None]:
rows = []
for r in results:
    rows.append({
        'Play': r['play'],
        'Frame': r['frame'],
        'Raw Output': r['analysis']
    })

df = pd.DataFrame(rows)
print(f"\nAnalysis complete. Processed {len(df)} frames.")
print(df.head())

# Save to CSV
df.to_csv('basketball_analysis_results.csv', index=False)
print("\nResults saved to 'basketball_analysis_results.csv'")


Analysis complete. Processed 171 frames.
        Play     Frame                                         Raw Output
0  play_0000  0001.jpg  system\nYou are a helpful assistant.\nuser\nYo...
1  play_0000  0002.jpg  system\nYou are a helpful assistant.\nuser\nYo...
2  play_0000  0003.jpg  system\nYou are a helpful assistant.\nuser\nYo...
3  play_0001  0001.jpg  system\nYou are a helpful assistant.\nuser\nYo...
4  play_0001  0002.jpg  system\nYou are a helpful assistant.\nuser\nYo...

Results saved to 'basketball_analysis_results.csv'
