In [None]:
import kagglehub
import os
import glob
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import requests

path = kagglehub.dataset_download("georgelifinrell/tvsum50-video-summarization")
NEW_KAGGLE_ROOT = path
TEST_VIDEO_ID = "-esJrBWj2d8"
SKIP_FRAMES = 15
TOP_K = 5
EPOCHS = 10
LR = 1e-4
VIDEO_DIR = ""
LABEL_FILE = "ydata-tvsum50-anno.tsv"

print(f"TensorFlow Version: {tf.__version__}")

def setup_paths():
    global VIDEO_DIR, LABEL_FILE
    print("--- Configuring Paths ---")
    
    video_search = glob.glob(os.path.join(NEW_KAGGLE_ROOT, "**", "*.mp4"), recursive=True)
    if video_search:
        VIDEO_DIR = os.path.dirname(video_search[0])
        print(f"✅ Found Video Directory: {VIDEO_DIR} ({len(video_search)} videos)")
    else:
        print("❌ ERROR: Could not find videos!")
        return False
    
    local_search = glob.glob(os.path.join(NEW_KAGGLE_ROOT, "**", "*anno*.tsv"), recursive=True)
    if local_search:
        LABEL_FILE = local_search[0]
        print(f"✅ Found Annotation File: {LABEL_FILE}")
    else:
        print("⬇️ Downloading annotation file from GitHub...")
        url = "https://raw.githubusercontent.com/yalesong/tvsum/master/data/ydata-tvsum50-anno.tsv"
        try:
            r = requests.get(url)
            with open("ydata-tvsum50-anno.tsv", 'wb') as f:
                f.write(r.content)
            LABEL_FILE = "ydata-tvsum50-anno.tsv"
            print(f"✅ Downloaded & Saved: {LABEL_FILE}")
        except Exception as e:
            print(f"❌ Download Failed: {e}")
            return False
    return True

def preprocess_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    original_frames = []
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: break
        if count % SKIP_FRAMES == 0:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frames.append(frame_rgb)
            frames.append(cv2.resize(frame_rgb, (224, 224)))
        count += 1
    cap.release()
    return np.array(frames), np.array(original_frames)

def build_autoencoder():
    input_img = Input(shape=(224, 224, 3))
    
    # Encoder
    x = Conv2D(64, (3,3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2,2), padding='same')(x)
    x = Conv2D(32, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2), padding='same')(x)
    x = Conv2D(16, (3,3), activation='relu', padding='same')(x)
    encoded = MaxPooling2D((2,2), padding='same')(x)  # latent space
    
    # Decoder
    x = Conv2D(16, (3,3), activation='relu', padding='same')(encoded)
    x = UpSampling2D((2,2))(x)
    x = Conv2D(32, (3,3), activation='relu', padding='same')(x)
    x = UpSampling2D((2,2))(x)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = UpSampling2D((2,2))(x)
    decoded = Conv2D(3, (3,3), activation='sigmoid', padding='same')(x)
    
    return models.Model(input_img, decoded), models.Model(input_img, encoded)

def train_unsupervised_tf(video_dir):
    print("\n=== Training Autoencoder (Model 2) ===")
    video_files = glob.glob(os.path.join(video_dir, "*.mp4"))
    all_frames = []
    
    for v_path in video_files:
        frames, _ = preprocess_video(v_path)
        if len(frames) > 0:
            all_frames.append(frames.astype('float32') / 255.0)
    
    if not all_frames: return None
    X_train = np.concatenate(all_frames, axis=0)
    
    datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.1,
        horizontal_flip=True
    )
    
    ae, encoder = build_autoencoder()
    ae.compile(optimizer='adam', loss='mse')  # MSE loss for pixel reconstruction
    ae.fit(datagen.flow(X_train, X_train, batch_size=16), epochs=EPOCHS, verbose=1)
    
    encoder.save("model2_encoder.keras")
    print("✅ Encoder Saved as 'model2_encoder.keras'")
    return encoder

def visualize_keyframes(original_frames, indices, title):
    indices = sorted(indices)
    plt.figure(figsize=(15, 4))
    plt.suptitle(title, fontsize=16, fontweight='bold', y=1.05)
    for i, idx in enumerate(indices):
        plt.subplot(1, len(indices), i + 1)
        plt.imshow(original_frames[idx])
        plt.axis('off')
        plt.title(f"Frame {idx}")
    plt.tight_layout()
    plt.show()

def evaluate_keyframes(encoder, video_path, top_k=5):
    frames, orig_frames = preprocess_video(video_path)
    if len(frames) == 0: return
    
    X_test = frames.astype('float32') / 255.0
    latent = encoder.predict(X_test, verbose=0).reshape(len(frames), -1)
    
    kmeans = KMeans(n_clusters=top_k, random_state=42).fit(latent)
    idx = []
    for center in kmeans.cluster_centers_:
        dist = np.linalg.norm(latent - center, axis=1)
        idx.append(np.argmin(dist))
    
    visualize_keyframes(orig_frames, idx, "Autoencoder Keyframes")

if __name__ == "__main__":
    if setup_paths():
        encoder = train_unsupervised_tf(VIDEO_DIR)
        
        # TEST
        test_search = glob.glob(os.path.join(VIDEO_DIR, f"*{TEST_VIDEO_ID}*"))
        if test_search:
            test_path = test_search[0]
            evaluate_keyframes(encoder, test_path, TOP_K)
        else:
            print(f"Test video {TEST_VIDEO_ID} not found.")