<a href="https://colab.research.google.com/github/silent-doom/agentic-ai/blob/feature%2Feditor-agent/Faizan_Editor_agent_v1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import json
import time
import re
import sys
import subprocess
import urllib.request

In [None]:
# =================================================================
# 0. LIGHTWEIGHT SETUP (No MediaPipe/TensorFlow)
# =================================================================

def install_lightweight_dependencies():
    """Installs only the necessary, stable libraries."""
    try:
        import moviepy
        import yt_dlp
        import whisper
    except ImportError:
        print("üì¶ Installing lightweight dependencies...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q",
            "moviepy==1.0.3", "yt-dlp", "git+https://github.com/openai/whisper.git"])

        # System deps for MoviePy
        subprocess.run("apt update -qq && apt install -qq imagemagick", shell=True, check=False)
        subprocess.run("sed -i 's/none/read,write/' /etc/ImageMagick-6/policy.xml", shell=True, check=False)

install_lightweight_dependencies()

In [None]:
# Imports
import cv2
import numpy as np
import yt_dlp
import whisper
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
from moviepy.video.fx.all import crop
from google.colab import drive

  IMAGEMAGICK_BINARY = r"C:\Program Files\ImageMagick-6.8.8-Q16\magick.exe"
  lines_video = [l for l in lines if ' Video: ' in l and re.search('\d+x\d+', l)]
  rotation_lines = [l for l in lines if 'rotate          :' in l and re.search('\d+$', l)]
  match = re.search('\d+$', rotation_line)
  if event.key is 'enter':



In [None]:
# =================================================================
# 1. CONFIGURATION & ROBUST PATH DISCOVERY
# =================================================================

def get_robust_paths():
    """
    Attempts to locate the AI_Transcripts folder across different mount points.
    Returns (TRANSCRIPT_FOLDER, INPUT_PLAN_FILE, OUTPUT_FOLDER)
    """
    # Standard Colab mount point is /content/drive/My Drive
    # POC mount point used previously was /content/gdrive/MyDrive
    possible_bases = [
        "/content/drive/My Drive/AI_Transcripts",
        "/content/gdrive/MyDrive/AI_Transcripts",
        "/content/gdrive/My Drive/AI_Transcripts",
        "/content/drive/MyDrive/AI_Transcripts"
    ]

    for base in possible_bases:
        if os.path.exists(base):
            # Force a refresh of the directory listing (fixes Colab stale file issues)
            os.listdir(base)
            plan_file = os.path.join(base, 'viral_clip_plan.json')
            if os.path.exists(plan_file):
                print(f"‚úÖ Found planning data at: {base}")
                return base, plan_file, os.path.join(base, 'final_shorts')

    # Default fallback if nothing is found (creates standard path)
    default_base = "/content/drive/My Drive/AI_Transcripts"
    return default_base, os.path.join(default_base, 'viral_clip_plan.json'), os.path.join(default_base, 'final_shorts')


# Haar Cascade for Face Detection (Standard Computer Vision model)
HAAR_URL = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"
HAAR_PATH = "/content/haarcascade_frontalface_default.xml"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Replace with the video used in Phase 1 if not dynamically loaded
YOUTUBE_URL = "https://www.youtube.com/watch?v=Rni7Fz7208c"

In [None]:
# =================================================================
# 2. UTILITY: SEGMENT DOWNLOAD
# =================================================================

def _parse_time_to_seconds(time_str):
    h, m, s = map(int, time_str.split(':'))
    return h * 3600 + m * 60 + s

def download_segment(url, start_time, end_time, output_path):
    """Downloads partial video segment using yt-dlp."""
    if os.path.exists(output_path):
        print(f"‚úÖ Segment exists: {output_path}")
        return output_path

    print(f"‚¨áÔ∏è Downloading segment: {start_time} - {end_time}...")

    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
        'outtmpl': output_path,
        'quiet': True,
        'download_ranges': lambda _, __: [{'start_time': _parse_time_to_seconds(start_time), 'end_time': _parse_time_to_seconds(end_time)}],
        'force_keyframes_at_cuts': True,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return output_path
    except Exception as e:
        print(f"üî¥ Download error: {e}")
        return None

In [None]:
# =================================================================
# 3. LIGHTWEIGHT FACE TRACKING (OpenCV Haar)
# =================================================================

def setup_face_detector():
    """Downloads the Haar Cascade XML if missing."""
    if not os.path.exists(HAAR_PATH):
        print("üì• Downloading Face Detection Model (Haar Cascade)...")
        urllib.request.urlretrieve(HAAR_URL, HAAR_PATH)
    return cv2.CascadeClassifier(HAAR_PATH)

def detect_face_x_center(frame, face_cascade):
    """Detects face center using OpenCV (No Tensorflow needed)."""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    if len(faces) == 0:
        return None

    # Pick the largest face
    largest_face = max(faces, key=lambda f: f[2] * f[3]) # width * height
    x, y, w, h = largest_face

    center_x = x + (w / 2)
    return center_x / frame.shape[1] # Return relative X (0.0 - 1.0)

def vertical_crop_smart(clip):
    """Crops 16:9 to 9:16 keeping the speaker centered using OpenCV."""
    print("ü§ñ Tracking face for smart crop (OpenCV)...")

    face_cascade = setup_face_detector()
    face_x_positions = []

    # Analyze 1 frame per second
    duration = int(clip.duration)
    if duration == 0: duration = 1

    for t in range(0, duration):
        try:
            # Get frame at time t
            frame = clip.get_frame(t)
            # detect_face_x_center expects BGR or RGB?
            # MoviePy returns RGB. OpenCV Cascade works on Gray, so conversion handled inside.
            x_pos = detect_face_x_center(frame, face_cascade)
            if x_pos: face_x_positions.append(x_pos)
        except Exception:
            pass

    # Calculate average position
    avg_x = sum(face_x_positions) / len(face_x_positions) if face_x_positions else 0.5
    print(f"‚úÖ Center detected at relative X: {avg_x:.2f}")

    w, h = clip.size
    target_ratio = 9 / 16
    new_width = h * target_ratio

    center_pixel = avg_x * w
    x1 = int(center_pixel - (new_width / 2))

    # Clamp bounds
    if x1 < 0: x1 = 0
    if x1 + new_width > w: x1 = w - new_width

    cropped = crop(clip, x1=x1, y1=0, width=int(new_width), height=h)
    return cropped.resize(height=1920)

In [None]:
# =================================================================
# 4. CAPTIONING
# =================================================================

def generate_dynamic_captions(video_clip, raw_audio_path):
    """Generates Whisper-based word-level captions."""
    print("üìù Generating captions...")
    video_clip.audio.write_audiofile(raw_audio_path, logger=None)

    model = whisper.load_model("base")
    result = model.transcribe(raw_audio_path, word_timestamps=True)

    caption_clips = []

    for segment in result['segments']:
        for word in segment.get('words', []):
            txt = word['word'].strip()
            start, end = word['start'], word['end']
            duration = end - start
            if duration < 0.1: duration = 0.1

            # Simple Karaoke Style
            txt_clip = (TextClip(txt, fontsize=85, color='yellow', font='Arial-Bold', stroke_color='black', stroke_width=3)
                        .set_position(('center', 0.8), relative=True)
                        .set_start(start)
                        .set_duration(duration))
            caption_clips.append(txt_clip)

    return caption_clips

In [None]:
# =================================================================
# 5. MAIN RUNNER
# =================================================================

def run_editor_agent():
    # Attempt to mount drive if not already visible
    if not os.path.exists("/content/drive"):
        print("Mounting Google Drive...")
        drive.mount('/content/drive')

    # Locate files
    TRANSCRIPT_FOLDER, INPUT_PLAN_FILE, OUTPUT_FOLDER = get_robust_paths()
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)

    if not os.path.exists(INPUT_PLAN_FILE):
        print(f"üî¥ Error: Plan file NOT found at any expected location.")
        print(f"Checked: {INPUT_PLAN_FILE}")
        print("Debugging: Current /content/drive contents:")
        os.system("ls -R /content/drive/My\ Drive | grep AI_Transcripts -A 5")
        return

    with open(INPUT_PLAN_FILE, 'r') as f:
        plan_data = json.load(f)

    clips = plan_data.get('viral_clips', [])
    print(f"üé¨ Processing {len(clips)} clips...")

    for i, clip in enumerate(clips):
        clip_id = clip.get('clip_id', i+1)
        clean_title = re.sub(r'[^a-zA-Z0-9]', '', clip['viral_hook'][:15])

        print(f"\n--- Clip {clip_id}: {clip['viral_hook']} ---")

        temp_vid = f"/content/temp_{clip_id}.mp4"
        temp_aud = f"/content/temp_{clip_id}.wav"
        final_path = os.path.join(OUTPUT_FOLDER, f"Short_{clip_id}_{clean_title}.mp4")

        # 1. Download Partial Segment
        seg_path = download_segment(YOUTUBE_URL, clip['start_time'], clip['end_time'], temp_vid)
        if not seg_path: continue

        try:
            # 2. Edit
            raw = VideoFileClip(seg_path)
            vertical = vertical_crop_smart(raw)
            captions = generate_dynamic_captions(vertical, temp_aud)

            # 3. Render
            final = CompositeVideoClip([vertical] + captions)
            print(f"üíæ Rendering: {final_path}")
            # Use 'medium' preset for speed, threads for multi-core processing
            final.write_videofile(final_path, codec='libx264', audio_codec='aac', fps=24, preset='medium', threads=4, logger=None)
            print("‚úÖ Done.")

        except Exception as e:
            print(f"üî¥ Error: {e}")
        finally:
            if os.path.exists(temp_vid): os.remove(temp_vid)
            if os.path.exists(temp_aud): os.remove(temp_aud)

if __name__ == "__main__":
    run_editor_agent()



  os.system("ls -R /content/drive/My\ Drive | grep AI_Transcripts -A 5")



Mounting Google Drive...
Mounted at /content/drive
‚úÖ Found planning data at: /content/drive/My Drive/AI_Transcripts
üé¨ Processing 5 clips...

--- Clip 1: Old Twitter was extremely far left; I am restoring balance now. ---
‚¨áÔ∏è Downloading segment: 00:05:00 - 00:06:05...




ü§ñ Tracking face for smart crop (OpenCV)...
üì• Downloading Face Detection Model (Haar Cascade)...
‚úÖ Center detected at relative X: 0.54
üìù Generating captions...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 139M/139M [00:01<00:00, 106MiB/s]



üíæ Rendering: /content/drive/My Drive/AI_Transcripts/final_shorts/Short_1_OldTwitterwas.mp4
‚úÖ Done.

--- Clip 2: Stop the brain rot: Social media optimizes for dopamine addiction. ---
‚¨áÔ∏è Downloading segment: 00:08:00 - 00:09:00...




ü§ñ Tracking face for smart crop (OpenCV)...
‚úÖ Center detected at relative X: 0.54
üìù Generating captions...





üíæ Rendering: /content/drive/My Drive/AI_Transcripts/final_shorts/Short_2_Stopthebrain.mp4
‚úÖ Done.

--- Clip 3: Working will be optional, like a hobby, in less than 20 years. ---
‚¨áÔ∏è Downloading segment: 00:32:40 - 00:33:45...




ü§ñ Tracking face for smart crop (OpenCV)...
‚úÖ Center detected at relative X: 0.56
üìù Generating captions...





üíæ Rendering: /content/drive/My Drive/AI_Transcripts/final_shorts/Short_3_Workingwillbe.mp4
‚úÖ Done.

--- Clip 4: AI is the ONLY solution for the massive US debt crisis. ---
‚¨áÔ∏è Downloading segment: 00:46:30 - 00:47:30...




ü§ñ Tracking face for smart crop (OpenCV)...
‚úÖ Center detected at relative X: 0.44
üìù Generating captions...





üíæ Rendering: /content/drive/My Drive/AI_Transcripts/final_shorts/Short_4_AIistheONLY.mp4
‚úÖ Done.

--- Clip 5: Based on video game progress, we are probably in a simulation. ---
‚¨áÔ∏è Downloading segment: 00:52:08 - 00:53:05...




ü§ñ Tracking face for smart crop (OpenCV)...
‚úÖ Center detected at relative X: 0.47
üìù Generating captions...





üíæ Rendering: /content/drive/My Drive/AI_Transcripts/final_shorts/Short_5_Basedonvideo.mp4
‚úÖ Done.
