In [None]:
# ==== Kaggle Video Transcript to 10-Second CSV + 10-Minute Summaries ====

# 1. Install libraries (run once)
!pip install moviepy git+https://github.com/openai/whisper.git transformers torch --quiet

import warnings
warnings.filterwarnings('ignore')

# 2. Find input video file
import os
input_dir = '/kaggle/input/rifatt'
video_filename = 'CSE-Math4641-Lecture18(Recording)1.mkv'
for root, dirs, files in os.walk(input_dir):
    for file in files:
        if file.lower().endswith(('.mp4', '.mkv', '.mov', '.avi')):
            video_filename = file
            video_path = os.path.join(root, file)
print(f"Found video: {video_filename} at {video_path}")

# 3. Extract audio from video
from moviepy.editor import VideoFileClip
print("Extracting audio from video...")
video = VideoFileClip(video_path)
audio_path = './extracted_audio.wav'
video.audio.write_audiofile(audio_path, verbose=False, logger=None)
video_duration = video.duration
print(f"Audio extraction completed! Video duration: {video_duration:.2f} seconds")

# 4. Transcribe audio with Whisper (word timestamps)
import whisper
print("Loading Whisper model...")
model = whisper.load_model('base')
print("Transcribing audio with word-level timestamps...")
result = model.transcribe(audio_path, word_timestamps=True)
print("Transcription completed!")

# 5. Build 10-second transcript chunks and save to CSV
import math
import pandas as pd

n_secs = int(math.ceil(result['segments'][-1]['end']))
seconds_per_chunk = 10
chunks = []
words_by_sec = [[] for _ in range(n_secs+1)]

for segment in result['segments']:
    for wi in segment.get('words', []):
        word = wi['word']
        start_sec = int(wi['start'])
        end_sec = int(wi['end'])
        for s in range(start_sec, end_sec+1):
            if s < len(words_by_sec):
                words_by_sec[s].append(word)

for chunk_start in range(0, n_secs+1, seconds_per_chunk):
    chunk_end = min(chunk_start + seconds_per_chunk, n_secs+1)
    chunk_words = []
    for s in range(chunk_start, chunk_end):
        chunk_words.extend(words_by_sec[s])
    start_time = f"{chunk_start//60:02d}:{chunk_start%60:02d}"
    end_time = f"{(chunk_end-1)//60:02d}:{(chunk_end-1)%60:02d}"
    transcript = ' '.join(chunk_words).strip()
    chunks.append({
        'start_time': start_time,
        'end_time': end_time,
        'transcript': transcript
    })

df_chunks = pd.DataFrame(chunks)
csv_filename = f"transcript_{video_filename.split('.')[0]}_10sec_chunks.csv"
df_chunks.to_csv(csv_filename, index=False)
print(f"\nTranscript chunks saved to CSV as: {csv_filename}")

# 6. Chunk on 10-minute basis and summarize
from transformers import pipeline, AutoTokenizer

print("Loading summarization model...")
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
summarizer = pipeline("summarization", model=model_name, device=0)

ten_min_chunks = []
text_chunks = df_chunks['transcript'].tolist()
chunk_size = 10 * 60 // seconds_per_chunk  # number of 10sec chunks per 10 min = 60

for i in range(0, len(text_chunks), chunk_size):
    chunk_start_sec = i * seconds_per_chunk
    chunk_end_sec = min((i + chunk_size) * seconds_per_chunk, n_secs+1)
    chunk_text = ' '.join(text_chunks[i:i+chunk_size])
    ts_label = f"{chunk_start_sec//60:02d}:{chunk_start_sec%60:02d} - {chunk_end_sec//60:02d}:{chunk_end_sec%60:02d}"
    tokens = tokenizer.encode(chunk_text)
    max_tokens = 900
    if len(tokens) > max_tokens:
        subnotes = []
        for j in range(0, len(tokens), max_tokens):
            sub_chunk = tokenizer.decode(tokens[j:j+max_tokens], skip_special_tokens=True)
            summary = summarizer(sub_chunk, max_length=200, min_length=60, do_sample=False)[0]['summary_text']
            subnotes.append(summary)
        summary_full = " ".join(subnotes)
    else:
        summary_full = summarizer(chunk_text, max_length=200, min_length=60, do_sample=False)[0]['summary_text']
    ten_min_chunks.append(f"=== 📅 Time {ts_label} ===\n📝 {summary_full}\n")
    print(f"Summarized chunk: {ts_label}")

final_notes = "\n".join(ten_min_chunks)
chunked_notes_filename = f"notes_{video_filename.split('.')[0]}_10minchunks.txt"
with open(chunked_notes_filename, "w", encoding="utf-8") as f:
    f.write("10-MINUTE CHUNKED VIDEO NOTES\n")
    f.write("="*60 + "\n\n")
    f.write(final_notes)
print(f"\nChunked notes saved as: {chunked_notes_filename}")

print("\n✅ All steps complete. CSV and summaries are ready!")

# (Optional) Preview CSV file
print(df_chunks.head)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Found video: CSE-Math4641-Lecture18(Recording)1.mkv at /kaggle/input/rifatt/CSE-Math4641-Lecture18(Recording)1.mkv
Extracting audio from video...
Audio extraction completed! Video duration: 4073.07 seconds
Loading Whisper model...
Transcribing audio with word-level timestamps...
