In [51]:
!pip install pysrt



In [44]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
import pysrt

# Load subtitles from an .srt file
subtitles = pysrt.open('/content/drive/My Drive/hack/Just go with it subtitle.srt', encoding='utf-8')
text_lines = [sub.text for sub in subtitles]

In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

# Load pre-trained model and tokenizer
model_name = "j-hartmann/emotion-english-roberta-large"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name)

# Analyze sentiments for each line
subtitle_sentiments = []
for line in text_lines:
    inputs = tokenizer(line, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits, dim=1).item()
    subtitle_sentiments.append((line, predicted_class))




In [None]:
# Filter subtitles for "Joy" sentiment (comedy scenes) and keep track of timestamps
comedy_segments = []
for subtitle, sentiment in subtitle_sentiments:
    # Assuming 2 represents 'Joy' sentiment
    if sentiment == 3 :
        comedy_segments.append(subtitle)

In [None]:
print (comedy_segments)

In [None]:
import re

# File name of the subtitle file
subtitle_file = "/content/drive/My Drive/hack/Just go with it subtitle.srt"

# Subtitles of interest, stored in comedy_segments list
selected = comedy_segments

# Dictionary to store timestamps with subtitles
timestamps = {}

# Regex to match timestamps and subtitles
pattern = re.compile(r"(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})\n(.+)")

# Read and process the subtitle file
with open(subtitle_file, "r", encoding="utf-8") as file:
    content = file.read()
    matches = pattern.findall(content)

    # Populate the dictionary with subtitle text and its timestamp
    for match in matches:
        time_range, subtitle = match
        timestamps[subtitle.strip()] = time_range.strip()

# Filter only selected subtitles in comedy_segments
filtered_timestamps = {subtitle: timestamps[subtitle] for subtitle in comedy_segments if subtitle in timestamps}

# Output result
for subtitle, time_range in filtered_timestamps.items():
    print(f"Subtitle: {subtitle}\nTimestamp: {time_range}\n")

In [50]:
import re
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.editor import concatenate_videoclips

# File names for the subtitle and movie files
subtitle_file = "/content/drive/My Drive/hack/Just go with it subtitle.srt"
movie_file = "/content/drive/My Drive/hack/Just_Go_With_It_2011_BluRay_720p_Hindi_2_0_English_AAC5_1_ESubs.mkv"

# List of comedy subtitles of interest
video = comedy_segments

# Dictionary to store timestamps for each selected subtitle
timestamps = {}

# Regex pattern to match timestamps and subtitles
pattern = re.compile(r"(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\n(.+)")

# Read and process the subtitle file
with open(subtitle_file, "r", encoding="utf-8") as file:
    content = file.read()
    matches = pattern.findall(content)

    # Populate the dictionary with subtitle text and its timestamp
    for start, end, subtitle in matches:
        subtitle = subtitle.strip()
        if subtitle in video:
            timestamps[subtitle] = (start.strip(), end.strip())

# Function to convert hh:mm:ss,ms format to seconds
def time_to_seconds(time_str):
    h, m, s_ms = time_str.split(":")
    s, ms = s_ms.split(",")
    return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000

# Load the movie
movie = VideoFileClip(movie_file)
clips = []

# Extract clips based on timestamps
for subtitle, (start, end) in timestamps.items():
    start_seconds = time_to_seconds(start)
    end_seconds = time_to_seconds(end)

    # Extract the clip and add it to the list
    clip = movie.subclip(start_seconds, end_seconds)
    clips.append(clip)
    print(f"Extracted clip for subtitle: '{subtitle}' from {start} to {end}")

# Concatenate all clips into one video
final_clip = concatenate_videoclips(clips)

# Save the final combined video
output_file = "/content/drive/My Drive/hack/comedy_clips_compilation.mp4"
final_clip.write_videofile(output_file, codec="libx264")

# Close the video file when done
movie.close()
print("Saved combined video with selected comedy clips.")


Extracted clip for subtitle: 'I'm so happy you picked this dress, Veruca!' from 00:00:44,240 to 00:00:46,280
Extracted clip for subtitle: 'You look beautiful. Doesn't she?' from 00:00:46,360 to 00:00:48,360
Extracted clip for subtitle: 'You guys, I'm getting married!' from 00:00:49,040 to 00:00:50,800
Extracted clip for subtitle: 'I'm getting married!' from 00:00:50,840 to 00:00:52,600
Extracted clip for subtitle: 'my sister-in-law in an hour!' from 00:01:16,360 to 00:01:18,360
Extracted clip for subtitle: 'We're seeing his goofy family as little as possible.' from 00:01:22,280 to 00:01:25,800
Extracted clip for subtitle: '(WOMEN LAUGHING)' from 00:01:32,960 to 00:01:34,120
Extracted clip for subtitle: 'Yeah, right!' from 00:02:04,000 to 00:02:05,160
Extracted clip for subtitle: 'Danny is so sweet and considerate.' from 00:02:05,320 to 00:02:07,680
Extracted clip for subtitle: 'So, I'm marrying him' from 00:02:09,920 to 00:02:11,680
Extracted clip for subtitle: 'when something pretty i



MoviePy - Done.
Moviepy - Writing video /content/drive/My Drive/hack/comedy_clips_compilation.mp4





Moviepy - Done !
Moviepy - video ready /content/drive/My Drive/hack/comedy_clips_compilation.mp4
Saved combined video with selected comedy clips.
