In [3]:
# =======================================================================
# CELL 1: SETUP, INSTALLS, AND FOLDER RESET
# =======================================================================
import os
import shutil
import warnings
# files is imported in Cell 2
import time # Used for simulated speed

# Install necessary libraries (scikit-learn is not always default in Colab)
print("Installing scikit-learn...")
!pip install scikit-learn

# Suppress sklearn warnings about future changes
warnings.filterwarnings("ignore", category=FutureWarning)

# --- Configuration (Colab-Specific) ---
CLIPS_FOLDER = "motion_clips"
FRAMES_FOLDER = "motion_frames"
LOG_FILENAME = "motion_metadata.csv"
SUMMARY_DATA_FILE = "ai_summary_features.csv"
FINAL_OUTPUT_FILE = "ai_classified_clips.csv"
DATA_FILES = [LOG_FILENAME, SUMMARY_DATA_FILE, FINAL_OUTPUT_FILE]

def reset_project_folders():
    """Clears out old clips, folders, and data files for a clean run."""
    print("--- 🗑️ STAGE 1: Starting Project Reset ---")

    # Delete existing folders
    for folder in [CLIPS_FOLDER, FRAMES_FOLDER]:
        if os.path.exists(folder):
            shutil.rmtree(folder)
            print(f"✅ Deleted existing folder: '{folder}'")

    # Delete existing data files
    for filename in DATA_FILES:
        if os.path.exists(filename):
            os.remove(filename)
            print(f"✅ Deleted old data file: '{filename}'")

    # Recreate empty clips folder
    os.makedirs(CLIPS_FOLDER, exist_ok=True)
    print(f"➕ Created new empty folder: '{CLIPS_FOLDER}'")

    print("--- Project Reset Complete. ---")

# Run the reset function
reset_project_folders()


Installing scikit-learn...
--- 🗑️ STAGE 1: Starting Project Reset ---
✅ Deleted existing folder: 'motion_clips'
✅ Deleted old data file: 'motion_metadata.csv'
➕ Created new empty folder: 'motion_clips'
--- Project Reset Complete. ---


In [4]:
# =======================================================================
# CELL 2: CLIP HARVESTER & RAW DATA LOGGER (HIGH SENSITIVITY)
# =======================================================================
import cv2
import os
from google.colab import files
from collections import deque
import numpy as np
import csv
import time

# --- Configuration (using global variables) ---
CLIPS_FOLDER = "motion_clips"
LOG_FILENAME = "motion_metadata.csv"
FRAME_SKIP_RATE = 5     # Process 1 frame out of every 5 (Speed)
MIN_CONTOUR_AREA_SENSITIVE = 4 # Restoring minimal size sensitivity

def run_harvester():
    print("Please upload your night sky video file.")
    uploaded = files.upload()
    video_path = next(iter(uploaded.keys()))
    print(f"Using video: {video_path}")

    # 1. MP4 Conversion (ensures compatibility)
    if not video_path.lower().endswith(".mp4"):
        mp4_path = os.path.splitext(video_path)[0] + ".mp4"
        !ffmpeg -i "{video_path}" -c:v libx264 -pix_fmt yuv420p "{mp4_path}" -y
        video_path = mp4_path
        print(f"Converted to MP4: {video_path}")

    # 2. Setup
    os.makedirs(CLIPS_FOLDER, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"ERROR: Video file '{video_path}' could not be opened.")

    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    if fps == 0:
        print("ERROR: Could not read video FPS. Assuming 30 FPS for buffer calculations.")
        fps = 30

    # MOG2 parameters for noise stability
    # Keeping varThreshold=64 to block common high-frequency noise/flicker
    fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=64, detectShadows=False)

    # --- Initialization ---
    frame_count = 0
    clip_index = 0
    start_time = time.time()

    # Clip variables
    min_frames_for_clip = int(fps * 1.0) # 1.0s minimum clip length
    pre_buffer_frames = fps
    post_buffer_frames = fps

    pre_motion_buffer = deque(maxlen=pre_buffer_frames)
    post_motion_buffer = deque()

    clip_frames_count = 0
    motion_active = False
    clip_writer = None

    # AI Logging Setup
    metadata_file = open(LOG_FILENAME, 'w', newline='')
    csv_writer = csv.writer(metadata_file)
    csv_writer.writerow(["CLIP_ID", "FRAME_NUM", "CLIP_FRAME_COUNT", "MAX_AREA",
                          "CENTROID_X", "CENTROID_Y", "ASPECT_RATIO"])
    current_clip_frames_data = []

    # Define Contour Filters
    # MIN_CONTOUR_AREA_SENSITIVE = 4
    FRAME_AREA = frame_width * frame_height
    MAX_CONTOUR_AREA = int(FRAME_AREA * 0.005) if FRAME_AREA > 0 else 10000
    print(f"Cloud Filter (Max Area): {MAX_CONTOUR_AREA} pixels. Min Area: {MIN_CONTOUR_AREA_SENSITIVE} pixels.")
    print(f"Video FPS: {fps}. Processing video at 1/{FRAME_SKIP_RATE} speed.")

    # --- Main Loop ---
    while True:
        ret, frame = cap.read()
        if not ret:
            # Handle final clip cleanup on video end (same as fixed version)
            if motion_active and clip_writer is not None:
                 if clip_frames_count >= min_frames_for_clip:
                    for f in post_motion_buffer: clip_writer.write(f)
                    csv_writer.writerows(current_clip_frames_data)

                 clip_writer.release()
                 if clip_frames_count < min_frames_for_clip:
                        os.remove(clip_filename)
            break

        frame_count += 1

        # Frame skipping optimization
        if frame_count % FRAME_SKIP_RATE != 0:
            pre_motion_buffer.append(frame.copy())
            continue

        # Processing steps
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray_blur = cv2.GaussianBlur(gray, (5,5), 0)
        fgmask = fgbg.apply(gray_blur)
        _, thresh = cv2.threshold(fgmask, 127, 255, cv2.THRESH_BINARY)
        kernel = np.ones((1,1),np.uint8) # Dilation FIX
        thresh = cv2.dilate(thresh, kernel, iterations=1)

        # Contour Detection and Filtering
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        motion_detected = False
        max_area_in_frame, best_centroid_x, best_centroid_y, best_aspect_ratio = 0, -1, -1, 0
        valid_contours = []

        for c in contours:
            area = cv2.contourArea(c)
            # 🌟 HIGH SENSITIVITY CHECK: Only checking MIN and MAX area
            if MIN_CONTOUR_AREA_SENSITIVE < area < MAX_CONTOUR_AREA:
                motion_detected = True
                valid_contours.append(c)

                if area > max_area_in_frame:
                    max_area_in_frame = area
                    M = cv2.moments(c)
                    if M["m00"] != 0:
                        best_centroid_x = int(M["m10"] / M["m00"])
                        best_centroid_y = int(M["m01"] / M["m00"])
                    x, y, w, h = cv2.boundingRect(c)
                    best_aspect_ratio = w / h if h > 0 else 0


        # Log Data and Draw Rectangles (No persistence check, relying on MOG2)
        if motion_detected:
            # Draw visuals
            for c in valid_contours:
                x, y, w, h = cv2.boundingRect(c)
                pad = 8
                cv2.rectangle(frame, (max(0, x-pad), max(0, y-pad)),
                                     (min(frame.shape[1]-1, x+w+pad), min(frame.shape[0]-1, y+h+pad)),
                                     (0, 0, 255), 2)

            # Log raw data for the frame
            current_clip_frames_data.append([
                -1, frame_count, clip_frames_count + 1, max_area_in_frame,
                best_centroid_x, best_centroid_y, best_aspect_ratio
            ])
            clip_frames_count += 1

        # Clip Writing Logic
        if motion_detected:
            if not motion_active:
                clip_index += 1
                clip_filename = os.path.join(CLIPS_FOLDER, f"clip_{clip_index:04d}.mp4")

                for row in current_clip_frames_data: row[0] = clip_index

                clip_writer = cv2.VideoWriter(clip_filename, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame.shape[1], frame.shape[0]))
                motion_active = True
                for f in pre_motion_buffer: clip_writer.write(f)

            clip_writer.write(frame)
            post_motion_count = 0
            post_motion_buffer.clear()

        else:
            # Motion ended
            if motion_active:
                post_motion_buffer.append(frame.copy())
                post_motion_count += 1

                if post_motion_count >= post_buffer_frames:
                    # Clip is ending, log data regardless of length
                    csv_writer.writerows(current_clip_frames_data)

                    if clip_frames_count < min_frames_for_clip:
                        clip_writer.release()
                        os.remove(clip_filename)
                        clip_index -= 1
                    else:
                        for f in post_motion_buffer: clip_writer.write(f)
                        clip_writer.release()

                    clip_writer = None
                    motion_active = False
                    clip_frames_count = 0
                    post_motion_buffer.clear()
                    current_clip_frames_data.clear()
            else:
                current_clip_frames_data.clear()

        pre_motion_buffer.append(frame.copy())


    cap.release()
    metadata_file.close()
    end_time = time.time()
    elapsed_time = round(end_time - start_time, 2)
    print("--- Harvester Complete! ---")
    print(f"Total clips generated: {clip_index}")
    print(f"Processing time: {elapsed_time} seconds.")

run_harvester()


Please upload your night sky video file.


Saving 140 mins unedited - Trim.mp4 to 140 mins unedited - Trim (1).mp4
Using video: 140 mins unedited - Trim (1).mp4
Cloud Filter (Max Area): 10368 pixels. Min Area: 4 pixels.
Video FPS: 29. Processing video at 1/5 speed.
--- Harvester Complete! ---
Total clips generated: 1
Processing time: 133.39 seconds.


In [5]:
# =======================================================================
# CELL 3: AI FEATURE PRE-PROCESSING (STAGE 3)
# =======================================================================
import pandas as pd
import numpy as np
import os

# --- Configuration ---
LOG_FILENAME = "motion_metadata.csv"
SUMMARY_DATA_FILE = "ai_summary_features.csv"

def run_pre_processor():
    """Calculates Avg Speed, Consistency, and Duration per clip from raw data."""
    print("\n--- 🧠 STAGE 3: Running AI Feature Pre-processing ---")

    if not os.path.exists(LOG_FILENAME):
        print(f"ERROR: Raw data file '{LOG_FILENAME}' not found. Check Cell 2 output.")
        return

    # 1. Load Raw Data
    df = pd.read_csv(LOG_FILENAME)
    if df.empty or len(df[df['CLIP_ID'] != -1]) == 0:
         print("ERROR: Loaded 0 valid data points. Cannot proceed. Check Cell 2 data logging.")
         return

    # Clean up frames where centroid calculation was impossible
    df = df[df['CENTROID_X'] != -1].copy()

    # 2. Calculate Frame-to-Frame Metrics (Instantaneous Speed)

    # Use shift() grouped by CLIP_ID to compare current position to previous position
    df['PREV_X'] = df.groupby('CLIP_ID')['CENTROID_X'].shift(1)
    df['PREV_Y'] = df.groupby('CLIP_ID')['CENTROID_Y'].shift(1)

    # Fill the first frame of each clip (where shift() creates NaN) with its own value
    df['PREV_X'] = df['PREV_X'].fillna(df['CENTROID_X'])
    df['PREV_Y'] = df['PREV_Y'].fillna(df['CENTROID_Y'])

    # Calculate pixel distance (SPEED) using Pythagorean theorem
    df['SPEED'] = np.sqrt(
        (df['CENTROID_X'] - df['PREV_X'])**2 +
        (df['CENTROID_Y'] - df['PREV_Y'])**2
    )

    # 3. Aggregate Features Per Clip (The Final AI Input)
    summary_df = df.groupby('CLIP_ID').agg(
        # Trajectory Metrics
        Total_Duration_Frames=('CLIP_FRAME_COUNT', 'max'),
        Avg_Speed_Pixel_Per_Frame=('SPEED', 'mean'),
        Speed_Consistency_STD=('SPEED', 'std'), # Standard deviation = Measure of stability/consistency

        # Object Metrics
        Max_Area_Overall=('MAX_AREA', 'max'),
        Avg_Aspect_Ratio=('ASPECT_RATIO', 'mean'),
        Max_Aspect_Ratio=('ASPECT_RATIO', 'max'),
    ).reset_index()

    # Fill NaN values for Speed_Consistency_STD (occurs if a clip is only 1 frame long)
    summary_df['Speed_Consistency_STD'] = summary_df['Speed_Consistency_STD'].fillna(0)

    # 4. Save Final Data
    summary_df.to_csv(SUMMARY_DATA_FILE, index=False)

    print(f"--- Pre-processing Complete! Generated {len(summary_df)} summary rows. ---")

run_pre_processor()



--- 🧠 STAGE 3: Running AI Feature Pre-processing ---
--- Pre-processing Complete! Generated 2 summary rows. ---


In [7]:
# =======================================================================
# CELL 5: AUTOMATED CLIP ORGANIZATION (STAGE 6)
# =======================================================================
import pandas as pd
import os
import shutil

# --- Configuration (using global variables) ---
CLIPS_FOLDER = "motion_clips"
FINAL_OUTPUT_FILE = "ai_classified_clips.csv"

def organize_clips():
    """Reads the final AI classification and moves clips into labeled subfolders."""
    print("\n--- 📂 STAGE 6: Organizing Clips into Folders ---")

    if not os.path.exists(FINAL_OUTPUT_FILE):
        print(f"ERROR: Final data file '{FINAL_OUTPUT_FILE}' not found. Check Cell 4 output.")
        return

    df = pd.read_csv(FINAL_OUTPUT_FILE)
    base_clip_folder = CLIPS_FOLDER

    if not os.path.exists(base_clip_folder):
         print(f"ERROR: Base clips folder '{base_clip_folder}' not found. Did Cell 2 run?")
         return

    # Process each clip in the final dataset
    for index, row in df.iterrows():
        clip_id = row['CLIP_ID']
        # Use the FINAL_CLASSIFICATION column generated by the Isolation Forest
        classification = row['FINAL_CLASSIFICATION']

        # 1. Determine the source and destination paths
        source_filename = f"clip_{clip_id:04d}.mp4"
        source_path = os.path.join(base_clip_folder, source_filename)

        destination_folder = os.path.join(base_clip_folder, classification)
        destination_path = os.path.join(destination_folder, source_filename)

        # 2. Create the destination folder if it doesn't exist (e.g., 'motion_clips/0_ANOMALY_UAP_REVIEW')
        os.makedirs(destination_folder, exist_ok=True)

        # 3. Move the file
        if os.path.exists(source_path):
            try:
                # shutil.move is the critical step that physically sorts the file
                shutil.move(source_path, destination_path)
            except Exception as e:
                # Handles cases where a clip might have been skipped or an error occurred
                print(f"Warning: Could not move {source_filename} to {classification}. Error: {e}")

    print("--- File Organization Complete! ---")
    print("Your project is fully processed. Download the 'motion_clips' folder to review the results.")

run_organize_clips()


NameError: name 'run_organize_clips' is not defined