In [1]:
#parsing video and extracting audio 
import moviepy.editor as mp
import os

def extract_audio(video_path):
    # Load the video file
    video = mp.VideoFileClip(video_path)

    # Extract the audio from the video
    audio = video.audio

    # Get the video filename without extension
    video_filename = os.path.splitext(os.path.basename(video_path))[0]

    # Generate the output audio filename
    audio_filename = f"{video_filename}_audio.mp3"

    # Write the audio to a file in the current directory
    audio.write_audiofile(audio_filename)

    print(f"Audio extracted and saved as: {audio_filename}")

# Specify the path to your video file
video_path = "test_vid.mp4"

# Extract audio from the video
extract_audio(video_path)

MoviePy - Writing audio in test_vid_audio.mp3


                                                                      

MoviePy - Done.
Audio extracted and saved as: test_vid_audio.mp3


In [2]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyBbZqL2pKUK9k8j-oYSLgnNMFS--wzFqek")
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_NONE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_NONE"
  },
]

# Set up the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 0,
   "response_mime_type": "application/json"
}

model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
                              generation_config=generation_config,
                                  safety_settings=safety_settings    )



In [4]:
path='test_vid_audio.mp3'
audiofile = genai.upload_file(path=path, display_name=path)

In [5]:
# Prompt 3: Enhanced for clearer requirements and structured output
schema = [
  {
    "startTime": "00:00",
    "endTime": "00:03",
    "Subtitle": "Translated subtitle...",
    "culturalLexicon": []
  },
  {
    "startTime": "00:03",
    "endTime": "00:07",
    "Subtitle": "Translated subtitle...",
    "culturalLexicon": [
      {
        "term": "specific term",
        "explanation": "Explanation providing cultural and contextual insight"
      }
    ]
  }
]
language = "hindi"
prompt = f"""
Translate the provided audio file into {language} for subtitle creation (follow the json schema), ensuring clarity and accessibility for  {language}  speakers unfamiliar with different culture. Identify any culturally specific phrases or terms in the lyrics that might require additional explanation for an international audience. 
For each culturally specific term, include an entry in the 'culturalLexicon' within the JSON output. This will help {language} users understand not just the language but also the cultural context of the lyrics.

 Here's the JSON schema
{schema}
Please ensure the Subtitles are accurate and follwing json schema and the explanations are detailed enough to convey the deeper meanings and cultural significance of the terms.
"""

# Configuration for generative AI model, specifying a longer timeout for processing audio and text.
model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest", generation_config={"response_mime_type": "application/json"})
response = model.generate_content([prompt, audiofile], request_options={"timeout": 180})
print(response.text)


[{"startTime": "00:00", "endTime": "00:53", "Subtitle": "", "culturalLexicon": []}, {"startTime": "00:53", "endTime": "01:03", "Subtitle": "मजबूरी को आगे बुलाऊं, आने जाने की आदत को ज़हर बना, हाँ तेरी पी जाऊं मैं पूरी", "culturalLexicon": []}, {"startTime": "01:03", "endTime": "01:18", "Subtitle": "आना था वो नहीं आया, दिल बाग बाग मेरा टकराया, कागा बोल के दस जावे,  पावें क्यों उसकी सूरी। रावां च बावां च उनको ढूंढू, कोई मुझे ना रोके", "culturalLexicon": [{"term": "कागा बोल के दस जावे", "explanation": "This phrase refers to seeking guidance or a message from a crow, often seen as a messenger in South Asian folklore. It reflects the protagonist's desperation in seeking answers about their beloved."}, {"term": "पावें क्यों उसकी सूरी", "explanation": " सूरी translates to 'clue'. The protagonist wonders why they are unable to find any clue of their beloved's whereabouts."}]}, {"startTime": "01:18", "endTime": "01:28", "Subtitle": "मेरे ढोल जुदाईयां दी, तेनू खबर किवें होवे, आ जावे दिल तेरा पूरा 

In [6]:
import json

def convert_time(simple_time):
    """Converts MM:SS format to HH:MM:SS.sss format."""
    min_sec = simple_time.split(':')
    minutes = int(min_sec[0])
    seconds = int(min_sec[1])
    # Ensure it returns the time in HH:MM:SS.sss format
    return f"00:{minutes:02}:{seconds:02}.000"

def json_to_webvtt(json_data):
    """Converts JSON subtitles with cultural lexicon to WebVTT format with distinct styling for cultural notes."""

    webvtt = "WEBVTT\n\n"
    
    # Add CSS styling for cultural lexicon and regular subtitles
    webvtt += """
STYLE
::cue(c) {
    /* Styles for regular subtitles at the bottom */
    line: 90%; /* position subtitles at the bottom */
    align: middle; /* center-align text */
    size: 80%; /* width of the cue box */
}
::cue(.cultural-note) {
    /* Styles for cultural notes at the top */
    line: 10%; /* position at the top */
    align: middle; /* center-align text */
    size: 80%; /* width of the cue box */
    background-color: rgba(0, 0, 0, 0.7);
    color: yellow;
    font-size: 18px;
}
"""
    
    # Add cultural lexicon notes and subtitles
    for item in json_data:
        start = convert_time(item["startTime"])
        end = convert_time(item["endTime"])
        
        # Add cultural notes if available
        if item.get("culturalLexicon", []):
            for note in item["culturalLexicon"]:
                webvtt += f"\n{start} --> {end} .cultural-note\n"
                webvtt += f"{note['term']}: {note['explanation']}\n"
        
        # Add subtitle
        subtitle = item["Subtitle"]
        webvtt += f"\n{start} --> {end} c\n{subtitle}\n"
    
    return webvtt

# Example usage with your chosen time format
example_json = json.loads(response.text)
webvtt_content = json_to_webvtt(example_json)

# Display or save the output
print(webvtt_content)
with open("subtitles hindi.vtt", "w", encoding="utf-8") as f:
    f.write(webvtt_content)


WEBVTT


STYLE
::cue(c) {
    /* Styles for regular subtitles at the bottom */
    line: 90%; /* position subtitles at the bottom */
    align: middle; /* center-align text */
    size: 80%; /* width of the cue box */
}
::cue(.cultural-note) {
    /* Styles for cultural notes at the top */
    line: 10%; /* position at the top */
    align: middle; /* center-align text */
    size: 80%; /* width of the cue box */
    background-color: rgba(0, 0, 0, 0.7);
    color: yellow;
    font-size: 18px;
}

00:00:00.000 --> 00:00:53.000 c


00:00:53.000 --> 00:01:03.000 c
मजबूरी को आगे बुलाऊं, आने जाने की आदत को ज़हर बना, हाँ तेरी पी जाऊं मैं पूरी

00:01:03.000 --> 00:01:18.000 .cultural-note
कागा बोल के दस जावे: This phrase refers to seeking guidance or a message from a crow, often seen as a messenger in South Asian folklore. It reflects the protagonist's desperation in seeking answers about their beloved.

00:01:03.000 --> 00:01:18.000 .cultural-note
पावें क्यों उसकी सूरी:  सूरी translates to 'c