# Name : Anand Prakash Raw
## Project : Audio Language Conversion Using Text Extraction (Eng to Hi)
## Subject : (CS-760) Natural Language Processing

### Steps Performed:
###    1) Extracting Text from Audio file.
###    2) Translating the text to other language.
 ###   3) Coverting the text back to audio file in that language.

# STEP 1
### Assembly AI API used for this purpose

In [1]:
import requests
import json
import time

def read_file(filename, chunk_size=5242880):
    # Open the file in binary mode for reading
    with open(filename, 'rb') as _file:
        while True:
            # Read a chunk of data from the file
            data = _file.read(chunk_size)
            # If there's no more data, stop reading
            if not data:
                break
            # Yield the data as a generator
            yield data

def upload_file(api_token, path):
    """
    Upload a file to the AssemblyAI API.

    Args:
        api_token (str): Your API token for AssemblyAI.
        path (str): Path to the local file.

    Returns:
        str: The upload URL.
    """
    print(f"Uploading file: {path}")

    # Set the headers for the request, including the API token
    headers = {'authorization': api_token}
    
    # Send a POST request to the API to upload the file, passing in the headers
    # and the file data
    response = requests.post('https://api.assemblyai.com/v2/upload',
                             headers=headers,
                             data=read_file(path))

    # If the response is successful, return the upload URL
    if response.status_code == 200:
        return response.json()["upload_url"]
    # If the response is not successful, print the error message and return
    # None
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

def create_transcript(api_token, audio_url):
    """
    Create a transcript using AssemblyAI API.

    Args:
        api_token (str): Your API token for AssemblyAI.
        audio_url (str): URL of the audio file to be transcribed.

    Returns:
        dict: Completed transcript object.
    """
    print("Transcribing audio... This might take a moment.")

    # Set the API endpoint for creating a new transcript
    url = "https://api.assemblyai.com/v2/transcript"

    # Set the headers for the request, including the API token and content type
    headers = {
        "authorization": api_token,
        "content-type": "application/json"
    }

    # Set the data for the request, including the URL of the audio file to be
    # transcribed
    data = {
        "audio_url": audio_url
    }

    # Send a POST request to the API to create a new transcript, passing in the
    # headers and data
    response = requests.post(url, json=data, headers=headers)

    # Get the transcript ID from the response JSON data
    transcript_id = response.json()['id']

    # Set the polling endpoint URL by appending the transcript ID to the API endpoint
    polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"

    # Keep polling the API until the transcription is complete
    while True:
        # Send a GET request to the polling endpoint, passing in the headers
        transcription_result = requests.get(polling_endpoint, headers=headers).json()

        # If the status of the transcription is 'completed', exit the loop
        if transcription_result['status'] == 'completed':
            break

        # If the status of the transcription is 'error', raise a runtime error with
        # the error message
        elif transcription_result['status'] == 'error':
            raise RuntimeError(f"Transcription failed: {transcription_result['error']}")

        # If the status of the transcription is not 'completed' or 'error', wait for
        # 3 seconds and poll again
        else:
            time.sleep(3)

    return transcription_result

# Your API token is already set in this variable
your_api_token = "a9795c76afbb482595441db6f2b86c26"

# -----------------------------------------------------------------------------
# Update the file path here, pointing to a local audio or video file.
# If you don't have one, download a sample file: https://storage.googleapis.com/aai-web-samples/espn-bears.m4a
# You may also remove the upload step and update the 'audio_url' parameter in the
# 'create_transcript' function to point to a remote audio or video file.
# -----------------------------------------------------------------------------
filename = input("Enter any audio file =")
print(filename)

# Upload the file to AssemblyAI and get the upload URL
upload_url = upload_file(your_api_token, filename)

# Transcribe the audio file using the upload URL
transcript = create_transcript(your_api_token, upload_url)

# Print the completed transcript object
# print(transcript)

Enter any audio file =JNU.mp3
JNU.mp3
Uploading file: JNU.mp3
Transcribing audio... This might take a moment.


In [2]:
type(transcript)

dict

In [3]:
output_text=transcript.get('text')

In [4]:
print(output_text)

Jawaharlal Nehru University. JNU is a renowned academic institution located in New Delhi, India. Named after India's first prime minister, Jawaharlal Nehru, the university was established in 1969 and has since emerged as a symbol of intellectual excellence and academic freedom. JNU is known for its vibrant and inclusive campus where students from diverse backgrounds come together to engage in rigorous academic pursuits. The university offers a wide range of undergraduate, postgraduate and doctoral programs across various disciplines, including social sciences, humanities, sciences and languages. JNU has a strong emphasis on research and encourages critical thinking and independent thought among its students. The university's faculty comprises distinguished scholars and experts who are actively involved in pushing the boundaries of knowledge through their research and publications. Apart from academics, JNU is also renowned for its vibrant student politics and activism, with students ac

# STEP 2

### googletrans library used for translation.

In [5]:
type(output_text)

str

In [6]:
from googletrans import Translator

def translate_to_hindi(text):
    translator = Translator(service_urls=['translate.google.com'])
    translated = translator.translate(text, dest='hi')
    return translated.text

s1 = json.dumps(output_text)
d2 = json.loads(s1)

# Example usage
english_text = d2
hindi_text = translate_to_hindi(english_text)
print(hindi_text)


जवाहरलाल नेहरू विश्वविद्यालय।JNU नई दिल्ली, भारत में स्थित एक प्रसिद्ध शैक्षणिक संस्थान है।भारत के पहले प्रधान मंत्री, जवाहरलाल नेहरू के नाम पर, विश्वविद्यालय 1969 में स्थापित किया गया था और तब से बौद्धिक उत्कृष्टता और शैक्षणिक स्वतंत्रता के प्रतीक के रूप में उभरा है।जेएनयू अपने जीवंत और समावेशी परिसर के लिए जाना जाता है, जहां विविध पृष्ठभूमि के छात्र कठोर शैक्षणिक गतिविधियों में संलग्न होने के लिए एक साथ आते हैं।विश्वविद्यालय सामाजिक विज्ञान, मानविकी, विज्ञान और भाषाओं सहित विभिन्न विषयों में स्नातक, स्नातकोत्तर और डॉक्टरेट कार्यक्रमों की एक विस्तृत श्रृंखला प्रदान करता है।जेएनयू ने अनुसंधान पर एक मजबूत जोर दिया है और अपने छात्रों के बीच महत्वपूर्ण सोच और स्वतंत्र विचार को प्रोत्साहित करता है।विश्वविद्यालय के संकाय में प्रतिष्ठित विद्वान और विशेषज्ञ शामिल हैं जो अपने शोध और प्रकाशनों के माध्यम से ज्ञान की सीमाओं को आगे बढ़ाने में सक्रिय रूप से शामिल हैं।शिक्षाविदों के अलावा, जेएनयू अपनी जीवंत छात्र राजनीति और सक्रियता के लिए भी प्रसिद्ध है, छात्रों ने सामाजिक, राजनीतिक और सांस्कृतिक म

# STEP 3

### GTTS library used for converting the translated text back to audio.

In [7]:
from gtts import gTTS
import os

def text_to_speech(text, output_file):
    tts = gTTS(text, lang='hi')
    tts.save(output_file)
    os.system(output_file)

# Example usage
output_filename = "output.mp3"
text_to_speech(hindi_text, output_filename)

sh: 1: output.mp3: not found


# STEP 4

### pygame and tkinter used for creating play and pause buttons

In [8]:
import pygame
import tkinter as tk
from tkinter import ttk

def play_audio():
    pygame.mixer.music.play()

def pause_audio():
    pygame.mixer.music.pause()

# Initialize pygame
pygame.mixer.init()

# Create GUI window
root = tk.Tk()
root.title("Audio Player")

# Create play button
play_button = ttk.Button(root, text="Play", command=play_audio)
play_button.pack()

# Create pause button
pause_button = ttk.Button(root, text="Pause", command=pause_audio)
pause_button.pack()

# Load audio file
file_path = 'output.mp3'  # Replace with the path to your audio file
pygame.mixer.music.load(file_path)

# Start GUI event loop
root.mainloop()



pygame 2.4.0 (SDL 2.26.4, Python 3.10.6)
Hello from the pygame community. https://www.pygame.org/contribute.html
