In [None]:
import os
import numpy as np
import cv2
from scipy.stats import kurtosis, weibull_min
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import joblib


In [None]:
from google.colab import drive
import os
zip_file_path = '/content/drive/MyDrive/test_images/trailer-.zip'
destination_folder = '/content/drive/MyDrive/'

!unzip -o -q "{zip_file_path}" -d "{destination_folder}"

print("Unzipping completed.")

Unzipping completed.


In [None]:
import cv2
import os
import numpy as np

def extract_keyframes(video_path, output_dir, motion_threshold=1.5, fps_sampling=5):
    """
    Extract keyframes from a single video
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Initialize video capture
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return

    # Get video properties
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
    frame_interval = frame_rate // fps_sampling
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Initialize variables
    prev_frame = None
    keyframe_index = 0
    frame_index = 0

    print(f"Processing: {os.path.basename(video_path)}")

    while frame_index < total_frames:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to grayscale
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Detect scene change
        if prev_frame is not None:
            diff = cv2.absdiff(prev_frame, gray_frame)
            mean_diff = np.mean(diff)

            if mean_diff > motion_threshold:
                keyframe_path = os.path.join(output_dir, f'keyframe_{keyframe_index:04d}.jpg')
                cv2.imwrite(keyframe_path, frame)
                keyframe_index += 1

        prev_frame = gray_frame
        frame_index += frame_interval

        # Print progress every 20%
        if frame_index % (total_frames // 5) < frame_interval:
            progress = (frame_index / total_frames) * 100
            print(f"Progress: {progress:.1f}%")

    cap.release()
    print(f"Extracted {keyframe_index} keyframes")
    return keyframe_index

def process_video_lists(movie_list, trailer_list, base_output_dir='/content/drive/MyDrive/keyframes'):
    """
    Process separate lists of movies and trailers
    """
    # Create main output directories
    movie_output_dir = os.path.join(base_output_dir, 'movies')
    trailer_output_dir = os.path.join(base_output_dir, 'trailers')

    print("\nProcessing Movies:")
    print("-----------------")
    for movie_path in movie_list:
        movie_name = os.path.splitext(os.path.basename(movie_path))[0]
        movie_dir = os.path.join(movie_output_dir, movie_name)
        extract_keyframes(movie_path, movie_dir)

    print("\nProcessing Trailers:")
    print("-------------------")
    for trailer_path in trailer_list:
        trailer_name = os.path.splitext(os.path.basename(trailer_path))[0]
        trailer_dir = os.path.join(trailer_output_dir, trailer_name)
        extract_keyframes(trailer_path, trailer_dir)

# Example usage
movie_paths = [
    '/content/drive/MyDrive/movie/BehindTheWalls.mp4',
    '/content/drive/MyDrive/movie/HighWay.mp4',
    '/content/drive/MyDrive/movie/NightVisit.mp4',
    '/content/drive/MyDrive/movie/Stucco.mp4',
    '/content/drive/MyDrive/movie/SushiNoh.mp4',
    '/content/drive/MyDrive/movie/TheBottom.mp4',
    '/content/drive/MyDrive/movie/TheChair.mp4',
    '/content/drive/MyDrive/movie/UntilDeath.mp4'
]

trailer_paths = [
    '/content/drive/MyDrive/trailer/BehindTheWallsT.mp4',
    '/content/drive/MyDrive/trailer/HighWayT.mp4',
    '/content/drive/MyDrive/trailer/NightVisitT.mp4',
    '/content/drive/MyDrive/trailer/StuccoT.mp4',
    '/content/drive/MyDrive/trailer/SushiNohT.mp4',
    '/content/drive/MyDrive/trailer/TheBottomT.mp4',
    '/content/drive/MyDrive/trailer/TheChairT.mp4',
    '/content/drive/MyDrive/trailer/UntilDeathT.mp4'
]

# Process the videos
process_video_lists(movie_paths, trailer_paths, 'keyframes')


Processing Movies:
-----------------
Processing: BehindTheWalls.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 2661 keyframes
Processing: HighWay.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 2360 keyframes
Processing: NightVisit.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 1805 keyframes
Processing: Stucco.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 1734 keyframes
Processing: SushiNoh.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 3180 keyframes
Processing: TheBottom.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 2096 keyframes
Processing: TheChair.mp4
Progress: 20.0%
Progress: 40.0%
Progress: 60.0%
Progress: 80.0%
Progress: 100.0%
Extracted 3486 keyframes
Processing: UntilDeath.mp4
Progress: 20

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import os
import numpy as np
from scipy.stats import kurtosis, weibull_min
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import RobustScaler
from sklearn.impute import SimpleImputer

# Path structure for movie and trailer keyframes
movie_keyframes_dir = '/content/keyframes/movies'
trailer_keyframes_dir = '/content/keyframes/trailers'

In [None]:
from scipy.stats import kurtosis
import numpy as np
import cv2
import os

def calculate_kurtosis_safe(data):
    """Calculate kurtosis only if data has sufficient variance."""
    if np.std(data) < 1e-5:  # Avoid kurtosis calculation for low variance
        return 0
    return kurtosis(data, nan_policy='omit')

def color_variance_in_luv(keyframes_dir):
    variances = []
    for root, _, files in os.walk(keyframes_dir):
        for filename in files:
            if filename.endswith('.jpg'):
                img = cv2.imread(os.path.join(root, filename))
                luv_img = cv2.cvtColor(img, cv2.COLOR_BGR2Luv)
                L_channel, u_channel, v_channel = cv2.split(luv_img)
                cov_matrix = np.cov([L_channel.flatten(), u_channel.flatten(), v_channel.flatten()])
                generalized_variance = np.linalg.det(cov_matrix)
                variances.append(generalized_variance)
    return np.mean(variances) if variances else np.nan

def lighting_key_features(keyframes_dir):
    brightness, shadows = [], []
    shadow_threshold = 0.18
    for root, _, files in os.walk(keyframes_dir):
        for filename in files:
            if filename.endswith('.jpg'):
                img = cv2.imread(os.path.join(root, filename))
                luv_img = cv2.cvtColor(img, cv2.COLOR_BGR2Luv)
                L_channel = luv_img[:, :, 0]
                brightness.append(np.median(L_channel))
                shadow_ratio = np.sum(L_channel < shadow_threshold * 255) / L_channel.size
                shadows.append(shadow_ratio)
    return np.mean(brightness), np.mean(shadows)

def hsv_color_features(keyframes_dir):
    h_means, s_means, v_means = [], [], []
    h_vars, s_vars, v_vars = [], [], []
    h_kurts, s_kurts, v_kurts = [], [], []
    for root, _, files in os.walk(keyframes_dir):
        for filename in files:
            if filename.endswith('.jpg'):
                img = cv2.imread(os.path.join(root, filename))
                hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                H, S, V = cv2.split(hsv_img)

                # Calculate means
                h_means.append(np.mean(H))
                s_means.append(np.mean(S))
                v_means.append(np.mean(V))

                # Calculate variances
                h_vars.append(np.var(H))
                s_vars.append(np.var(S))
                v_vars.append(np.var(V))

                # Calculate kurtosis safely
                h_kurts.append(calculate_kurtosis_safe(H.flatten()))
                s_kurts.append(calculate_kurtosis_safe(S.flatten()))
                v_kurts.append(calculate_kurtosis_safe(V.flatten()))

    return (np.mean(h_means), np.mean(s_means), np.mean(v_means),
            np.mean(h_vars), np.mean(s_vars), np.mean(v_vars),
            np.mean(h_kurts), np.mean(s_kurts), np.mean(v_kurts))

def texture_analysis(keyframes_dir):
    beta_params, gamma_params = [], []
    for root, _, files in os.walk(keyframes_dir):
        for filename in files:
            if filename.endswith('.jpg'):
                img = cv2.imread(os.path.join(root, filename), cv2.IMREAD_GRAYSCALE)
                hist, _ = np.histogram(img.flatten(), bins=256, density=True)
                params = weibull_min.fit(hist, floc=0)
                beta_params.append(params[0])
                gamma_params.append(params[1])
    return np.mean(beta_params), np.mean(gamma_params)

def spatial_features(keyframes_dir, grid_size=3):
    spatial_means, spatial_vars = [], []
    for root, _, files in os.walk(keyframes_dir):
        for filename in files:
            if filename.endswith('.jpg'):
                img = cv2.imread(os.path.join(root, filename))
                h, w, _ = img.shape
                grid_h, grid_w = h // grid_size, w // grid_size
                for i in range(grid_size):
                    for j in range(grid_size):
                        grid = img[i*grid_h:(i+1)*grid_h, j*grid_w:(j+1)*grid_w]
                        spatial_means.append(np.mean(grid))
                        spatial_vars.append(np.var(grid))
    return np.mean(spatial_means), np.mean(spatial_vars)


In [None]:
def extract_all_features(keyframes_dir):
    features = []
    features.append(color_variance_in_luv(keyframes_dir))
    features.extend(lighting_key_features(keyframes_dir))
    features.extend(hsv_color_features(keyframes_dir))
    features.extend(texture_analysis(keyframes_dir))
    features.extend(spatial_features(keyframes_dir))
    return features

# Process features for movies and trailers separately
def extract_features_for_dirs(keyframes_dirs):
    features = []
    for keyframes_dir in keyframes_dirs:
        features.append(extract_all_features(keyframes_dir))
    return np.array(features)


In [None]:
movie_dirs = [os.path.join(movie_keyframes_dir, movie) for movie in os.listdir(movie_keyframes_dir)]
trailer_dirs = [os.path.join(trailer_keyframes_dir, trailer) for trailer in os.listdir(trailer_keyframes_dir)]

# Extract features
movie_features = extract_features_for_dirs(movie_dirs)
trailer_features = extract_features_for_dirs(trailer_dirs)

In [None]:
import numpy as np
import joblib
from sklearn.svm import OneClassSVM
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, StandardScaler

# Data preprocessing: impute and scale
imputer = SimpleImputer(strategy='mean')
scaler = RobustScaler()
movie_features = scaler.fit_transform(imputer.fit_transform(movie_features))
trailer_features = scaler.transform(imputer.transform(trailer_features))

# Scoring function for One-Class SVM
def ocsvm_score(estimator, X):
    scores = estimator.decision_function(X)
    return np.mean(scores)

# Manual hyperparameter tuning function for One-Class SVM
def tune_svm_hyperparameters(X_scaled):
    best_score = -np.inf
    best_model = None

    # Define parameter grid
    nu_values = [0.01, 0.1, 0.5]
    kernels = ['rbf', 'linear']
    gamma_values = ['scale', 0.001, 0.01]

    for nu in nu_values:
        for kernel in kernels:
            for gamma in gamma_values:
                oc_svm = OneClassSVM(nu=nu, kernel=kernel, gamma=gamma)
                oc_svm.fit(X_scaled)  # Fit model
                score = ocsvm_score(oc_svm, X_scaled)  # Evaluate model

                # Check if this is the best score so far
                if score > best_score:
                    best_score = score
                    best_model = oc_svm

    return best_model

# Tune and train the One-Class SVM model manually on movie features
oc_svm_model = tune_svm_hyperparameters(movie_features)

# Save the model, scaler, and imputer
joblib.dump(oc_svm_model, 'video2SVC1.pkl')
joblib.dump(scaler, 'video2SVCScaler.pkl')
joblib.dump(imputer, 'video2SVCImputer.pkl')

print("Imputer, scaler, and One-Class SVM model saved after manual hyperparameter tuning.")


Imputer, scaler, and One-Class SVM model saved after manual hyperparameter tuning.


In [None]:
from sklearn.svm import OneClassSVM
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler
import joblib
import numpy as np

# Custom scoring function
def ocsvm_score(estimator, X):
    estimator.fit(X)
    scores = estimator.decision_function(X)
    return np.mean(scores)

# Hyperparameter tuning function
def tune_svm_hyperparameters(X):
    best_score = -np.inf
    best_model = None
    nu_values = [0.01, 0.1, 0.5]
    kernels = ['rbf', 'linear']
    gamma_values = ['scale', 0.001, 0.01]

    for nu in nu_values:
        for kernel in kernels:
            for gamma in gamma_values:
                oc_svm = OneClassSVM(nu=nu, kernel=kernel, gamma=gamma)
                score = ocsvm_score(oc_svm, X)

                if score > best_score:
                    best_score = score
                    best_model = oc_svm

    return best_model

# Stack movie and trailer features
stacked_features = np.vstack((movie_features, trailer_features))

# Impute and scale
imputer = SimpleImputer(strategy='mean')
scaler = RobustScaler()
stacked_features = scaler.fit_transform(imputer.fit_transform(stacked_features))

# Tune and train on the stacked features
oc_svm_model = tune_svm_hyperparameters(stacked_features)

# Save model, scaler, and imputer
joblib.dump(oc_svm_model, 'video2SVC1_combined.pkl')
joblib.dump(scaler, 'video2SVCScaler_combined.pkl')
joblib.dump(imputer, 'video2SVCImputer_combined.pkl')

print("Model, scaler, and imputer saved for stacked movie and trailer features.")


Model, scaler, and imputer saved for stacked movie and trailer features.


In [None]:
import pandas as pd
import joblib
import numpy as np
import cv2
import shutil
import os
from sklearn.metrics.pairwise import cosine_similarity

best_oc_svm_model = joblib.load('/content/video2SVC1_combined.pkl')
scaler = joblib.load('/content/video2SVCImputer_combined.pkl')
imputer = joblib.load('/content/video2SVCImputer_combined.pkl')

timestamps_df = pd.read_csv('/content/IgnoreITAudioCSV.csv')
movie_file = '/content/ignoreIT.mp4'

def extract_frames(movie_file, timestamps):
    frames = []
    video = cv2.VideoCapture(movie_file)

    for _, row in timestamps.iterrows():
        start_time = row['Start Time (s)']
        video.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000)
        success, frame = video.read()
        if success:
            frames.append(frame)

    video.release()
    return frames

def extract_all_features(keyframes_dir):
    features = []
    features.append(color_variance_in_luv(keyframes_dir))
    features.extend(lighting_key_features(keyframes_dir))
    features.extend(hsv_color_features(keyframes_dir))
    features.extend(texture_analysis(keyframes_dir))
    features.extend(spatial_features(keyframes_dir))
    return features

def extract_features_from_frame(frame):
    temp_dir = 'temp_frame_dir'
    os.makedirs(temp_dir, exist_ok=True)
    frame_path = os.path.join(temp_dir, 'frame.jpg')
    cv2.imwrite(frame_path, frame)
    features = extract_all_features(temp_dir)
    shutil.rmtree(temp_dir)
    return features

frames = extract_frames(movie_file, timestamps_df)

X_new = np.array([extract_features_from_frame(frame) for frame in frames])

if np.any(np.isnan(X_new)):
    X_new_imputed = imputer.transform(X_new)
else:
    X_new_imputed = X_new
X_new_scaled = scaler.transform(X_new_imputed)

decision_scores = best_oc_svm_model.decision_function(X_new_scaled)
print(decision_scores)
threshold = -1 # Define your threshold here based on your model's expected score range

trailer_worthy_indices = np.where(decision_scores > threshold)[0]

timestamps_df['trailer_worthy'] = np.where(timestamps_df.index.isin(trailer_worthy_indices), 1, -1)

trailer_worthy_df = timestamps_df[timestamps_df['trailer_worthy'] == 1]

if not trailer_worthy_df.empty:
    print(trailer_worthy_df)
    trailer_worthy_df.to_csv('ignoreItTrailerV2.csv', index=False)
else:
    print("No trailer-worthy timestamps were found.")


[-11402.24467428  82018.16807601 107096.95059976 358232.94495827
 -12399.07085859 366704.06564731 108276.34173681   1214.41186298
 371996.36381508 366138.60493097 262935.16151966  -9430.88552438
  -3275.98471428 104138.61792512  -2306.86706166  73054.8533785
  21444.57296186  40309.88350055  -4164.61268385  23232.67961618
  54698.66927246  44015.83028886]
    Start Time (s)  End Time (s)  trailer_worthy
1               35            40               1
2              135           140               1
3              125           130               1
5              130           135               1
6              160           165               1
7                5            10               1
8              110           115               1
9              155           160               1
10             105           110               1
13              45            50               1
15              25            30               1
16              10            15               1
17   

In [None]:
from moviepy.editor import VideoFileClip, concatenate_videoclips
import pandas as pd

# Load timestamps from CSV
csv_file_path = '/content/trailer_worthy_timestamps.csv'
timestamps_df = pd.read_csv(csv_file_path)

# Load the movie file
movie_file_path = '/content/drive/MyDrive/VideoMP4/bleep.mp4'
movie = VideoFileClip(movie_file_path)

# Initialize a list to hold video clips
clips = []

# Extract clips based on the timestamps
for _, row in timestamps_df.iterrows():
    start_time = row["Start Time (s)"]
    end_time = row["End Time (s)"]

    if not (start_time == 315 and end_time == 320):
        clip = movie.subclip(start_time, end_time)
        clips.append(clip)

# Concatenate all clips into one final trailer
final_trailer = concatenate_videoclips(clips, method="compose")

# Write the final trailer to a file, ensuring audio is included
output_path = "bleep2.mp4"
final_trailer.write_videofile(output_path, codec="libx264", audio_codec="aac")

print(f"Trailer-worthy compilation saved as {output_path}")


Moviepy - Building video bleep2.mp4.
Moviepy - Writing video bleep2.mp4





Moviepy - Done !
Moviepy - video ready bleep2.mp4
Trailer-worthy compilation saved as bleep2.mp4
