In [1]:
pip install torch transformers gtts moviepy pygame whisper


Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install ipywidgets


Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
import time
import torch
from moviepy.editor import VideoFileClip
from transformers import (DistilBertTokenizer, DistilBertForQuestionAnswering,
                          MarianMTModel, MarianTokenizer, pipeline)
import whisper
from gtts import gTTS
import pygame


# Function to extract audio from a video file
def extract_audio_from_video(video_path, output_audio_path):
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(output_audio_path)


# Function to transcribe audio using Whisper
def transcribe_audio(audio_path):
    model = whisper.load_model("base")
    result = model.transcribe(audio_path)
    return result['text']


# Function to summarize the transcription using a summarization pipeline
def summarize_transcription(transcription):
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summary = summarizer(transcription, max_length=50, min_length=20, do_sample=False)
    return summary[0]['summary_text']


# Function to initialize the DistilBERT model and tokenizer for question answering
def init_qa_model():
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased-distilled-squad')
    model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad')
    return tokenizer, model


# Function to answer questions based on context using DistilBERT
def answer_question(question, context, tokenizer, model):
    inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    
    start_index = torch.argmax(outputs.start_logits)
    end_index = torch.argmax(outputs.end_logits) + 1
    answer_tokens = input_ids[0][start_index:end_index]
    answer = tokenizer.decode(answer_tokens)

    return answer


# Function to load translation model and tokenizer
def load_translation_model(source_lang, target_lang):
    model_name = f'Helsinki-NLP/opus-mt-{source_lang}-{target_lang}'
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    return model, tokenizer


# Function to translate text using MarianMT
def translate_text(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    return translated_text


# Function for text-to-speech
def text_to_speech(text, language_code='en'):
    audio_file = f"answer_{int(time.time())}.mp3"
    tts = gTTS(text, lang=language_code)
    tts.save(audio_file)
    pygame.mixer.init()
    pygame.mixer.music.load(audio_file)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10)
    pygame.mixer.music.stop()
    pygame.mixer.quit()
    os.remove(audio_file)  # Clean up the audio file


def main(video_path):
    # Step 1: Extract audio from the video
    audio_path = "extracted_audio.wav"
    extract_audio_from_video(video_path, audio_path)
    print("Audio extracted successfully.")

    # Step 2: Transcribe audio to text
    transcription = transcribe_audio(audio_path)
    print("Transcription:", transcription)

    # Step 3: Save transcription to a file
    with open("extracted_text.txt", "w") as text_file:
        text_file.write(transcription)

    # Step 4: Summarize transcription
    summary = summarize_transcription(transcription)
    print("Summary:", summary)
    
    # Convert the summary to speech
    text_to_speech(summary)

    # Initialize the question-answering model
    tokenizer, model = init_qa_model()

    # Step 5: Loop for answering questions
    while True:
        question = input("Ask a question (or type 'exit' to quit): ")
        if question.lower() == 'exit':
            break
        answer = answer_question(question, transcription, tokenizer, model)
        print("Answer:", answer)

        # Step 6: Translate answer if desired
        translate = input("Do you want to translate the answer? (yes/no): ").lower()
        if translate == 'yes':
            target_lang = input("Enter target language code (e.g., 'fr' for French, 'es' for Spanish): ").lower()
            translation_model, translation_tokenizer = load_translation_model('en', target_lang)
            translated_answer = translate_text(answer, translation_model, translation_tokenizer)
            print(f"Translated Answer ({target_lang}):", translated_answer)
            text_to_speech(translated_answer, target_lang)
        else:
            text_to_speech(answer)


# Run the main function with your video file path
if __name__ == "__main__":
    video_file_path = "video1.mp4"  # Replace with your video file path
    main(video_file_path)


MoviePy - Writing audio in extracted_audio.wav


                                                                                                                       

MoviePy - Done.
Audio extracted successfully.
Transcription:  Hello students, this is a tutorial on how to register mentors for the upcoming semester. Discover the step-by-step process of enrolling mentors and preparing for a successful semester ahead. How to Login. Here we go. After clicking on the link provided by our institution, then you will redirect it into a page like this, and then use the username and password given by our college and hit Login. How to update your profile. Alright, let's see.
Summary: This is a tutorial on how to register mentors for the upcoming semester. Discover the step-by-step process of enrolling mentors and preparing for a successful semester.


Ask a question (or type 'exit' to quit):  register


Answer: register [SEP] hello students, this is a tutorial on how to register mentors for the upcoming semester. discover the step - by - step process of enrolling mentors and preparing for a successful semester ahead. how to login


Do you want to translate the answer? (yes/no):  yes
Enter target language code (e.g., 'fr' for French, 'es' for Spanish):  fr


Translated Answer (fr): s'inscrire [SEP] bonjour aux étudiants, c'est un tutoriel sur la façon d'inscrire les mentors pour le semestre à venir. découvrez le processus étape par étape de l'inscription des mentors et de la préparation pour un semestre réussi à l'avance. comment se connecter
