In [None]:
# Install Whisper and its dependencies
!pip install pydub
!apt-get install ffmpeg  # Required for pydub to handle MP3 files
!pip install git+https://github.com/openai/whisper.git



Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-gbjrhgeo
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-gbjrhgeo
  Resolved https://github.com/openai/whisper.git to commit 90db0de1896c23cbfaf0c58bc2d30665f709f170
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper==20240930)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting triton>=2.0.0 (from openai-whisper==20240930)
  Downloading triton-3.1.0-cp310-cp

Installing Whisper and FFMPEG for the ASR and the audio controlling+format changes respectively.

Importing Torch and checking if GPU side is all working.

In [None]:
import whisper

# Verify model loading
model = whisper.load_model("medium")
print("Whisper model loaded successfully!")


Bringing in Whisper and checking if it works.

In [None]:
from google.colab import files

# Upload your file
uploaded = files.upload()

# File path will be stored as keys in the uploaded dictionary
audio_file_path = list(uploaded.keys())[0]
print(f"Uploaded file: {audio_file_path}")


Uploading the audio files to use the ASR up next.

In [None]:
import whisper
import os
import warnings

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Set up model
model = whisper.load_model("medium")

# Define the transcription function
def transcribe_audio(audio_file, language):
    try:
        result = model.transcribe(audio_file, language=language)
        return result["text"]
    except Exception as e:
        print(f"Error during transcription: {e}")
        return None

# Map audio files
audio_files = {
    "hin_test_aud.wav": "hi",  # Hindi
}

# Iterate over the files for transcription
for audio_file, lang in audio_files.items():
    if not os.path.exists(audio_file):
        print(f"File not found: {audio_file}")
        continue

    print(f"Processing file: {audio_file} (Language: {lang})")
    transcription = transcribe_audio(audio_file, language=lang)

    if transcription:
        print(f"Transcription ({lang}):\n{transcription}\n")
    else:
        print(f"Failed to transcribe {audio_file}.\n")


Here we have implementing an ASR(Automatic Speech Recognition) by using Whisper from OpenAI. loading the medium model (can use large if we want to even go further to detect accents and such);

Here the language we want is chosen and the audio is converted to text in the desired language (hi=hindi).

In [None]:
from transformers import pipeline

# Load the Hugging Face multilingual model for summarization (mBART) and set device=-1 for CPU usage
summarizer = pipeline("summarization", model="facebook/mbart-large-50-one-to-many-mmt", tokenizer="facebook/mbart-large-50-one-to-many-mmt", device=-1)  # -1 sets to CPU

# Input Hindi text for summarization
hindi_text = """
भारत एक महान देश है, जिसमें बहुत सारी भाषाएँ, संस्कृतियाँ, और धर्म हैं। यहाँ की ऐतिहासिक धरोहर और सांस्कृतिक विविधता को देखने के लिए दुनियाभर से लोग आते हैं। भारतीय समाज में विविधता है, और हर राज्य की अपनी अलग पहचान है। भारतीय राजनीति, समाज और अर्थव्यवस्था ने बहुत से परिवर्तन देखे हैं। इस देश की प्रगति के लिए मेहनत और शिक्षा महत्वपूर्ण हैं।
"""

# Summarize the Hindi text
summary = summarizer(hindi_text, max_length=30, min_length=20, do_sample=False)

# Output the summary
print("Summarized Hindi Text:", summary[0]['summary_text'])


Summarizing the Hindi text using mBART

In [None]:
from transformers import pipeline

# Load the Hugging Face translation model for Hindi to English
translator = pipeline("translation_xx_to_yy", model="Helsinki-NLP/opus-mt-hi-en", device=-1)

# Example input Hindi text (from your summary)
input_text = "भारत एक महान देश है, जिसमें बहुत सारी भाषाएँ, संस्कृतियाँ, और धर्म हैं। यहाँ की ऐतिहासिक धरोहर और सांस्कृतिक है"

# Translate the summarized Hindi text to English
translated_text = translator(input_text)

# Display the translated text
print("Translated Text:", translated_text[0]['translation_text'])


Performing translation of the received summary from Hindi to English by using Helsinki-NLP from hugging face.

In [None]:
from transformers import pipeline

# Initialize the sentiment-analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# English translated text (from previous translation step)
english_text = "India is a great country with many languages, cultures, and religions. People come from all over the world to see its historical heritage and cultural diversity. Indian society is diverse, and each state has its own identity. Indian politics, society, and economy have seen many changes. Hard work and education are essential for the progress of this country."

# Apply sentiment analysis to the English text
sentiment = sentiment_analyzer(english_text)

# Print the sentiment result
print("Sentiment Analysis Result:", sentiment)


Here we are performing the sentiment analysis using the huggingface's pre trained model.

In [None]:
import matplotlib.pyplot as plt

# Sentiment analysis result
sentiment_result = {'label': 'POSITIVE', 'score': 0.9996817111968994}

# Prepare data for plotting
labels = ['POSITIVE', 'NEGATIVE', 'NEUTRAL']
scores = [sentiment_result['score'], 1 - sentiment_result['score'], 0]

# Create a bar chart for sentiment visualization
plt.figure(figsize=(8, 6))
plt.bar(labels, scores, color=['green', 'red', 'gray'])
plt.title('Sentiment Analysis Visualization')
plt.xlabel('Sentiment Label')
plt.ylabel('Score')
plt.ylim(0, 1)
plt.show()


Here now we are trying visualize the sentiment analysis result with a graph