In [57]:
import whisper

In [58]:
# Load the model 
model = whisper.load_model("base")

In [59]:
result = model.transcribe(r"C:\Users\kanch\Downloads\audio 2.mp3", task="translate")



In [60]:
# Print full transcription
transcribed_text = result["text"]
print("\nRomanized Hindi Transcription")
print(transcribed_text)


Romanized Hindi Transcription
 He was not able to use the two legs. So he didn't know where he was. He was hit by someone or someone else. But he didn't even have blood loss. He didn't have the dog. He went to Amit Sar. Amit Sar took him to the gym. He took all the stuff he had. Amit Sar didn't even understand what he said. He said he is a bad man. He is only a pair of men. He didn't have the blood loss. He didn't have the blood loss. He took him to the gym. He gave him the paint. He was resting. He didn't eat anything. He didn't eat anything. He just had to eat.


In [61]:
# Print segment-wise with timestamps
print("\n Segments with Timestamps")
for segment in result["segments"]:
    start = round(segment["start"], 2)
    end = round(segment["end"], 2)
    text = segment["text"].strip()
    print(f"[{start}s - {end}s]  {text}")


 Segments with Timestamps
[0.0s - 3.0s]  He was not able to use the two legs.
[3.0s - 4.0s]  So he didn't know where he was.
[4.0s - 8.0s]  He was hit by someone or someone else.
[8.0s - 10.0s]  But he didn't even have blood loss.
[10.0s - 12.0s]  He didn't have the dog.
[12.0s - 14.0s]  He went to Amit Sar.
[14.0s - 18.0s]  Amit Sar took him to the gym.
[18.0s - 20.0s]  He took all the stuff he had.
[20.0s - 24.0s]  Amit Sar didn't even understand what he said.
[24.0s - 26.0s]  He said he is a bad man.
[26.0s - 28.0s]  He is only a pair of men.
[28.0s - 30.0s]  He didn't have the blood loss.
[30.0s - 32.0s]  He didn't have the blood loss.
[32.0s - 34.0s]  He took him to the gym.
[34.0s - 36.0s]  He gave him the paint.
[36.0s - 38.0s]  He was resting.
[38.0s - 40.0s]  He didn't eat anything.
[40.0s - 42.0s]  He didn't eat anything.
[42.0s - 44.0s]  He just had to eat.


In [62]:
# Load KeyBERT model
from keybert import KeyBERT
kw_model = KeyBERT()

In [63]:
# Extract key topics (keywords/phrases)
keywords = kw_model.extract_keywords(transcribed_text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=10)

In [64]:
# Print key topics
print("\n Key Topics")
for kw in keywords:
    print(f"- {kw[0]} (Score: {kw[1]:.2f})")


 Key Topics
- sar took (Score: 0.39)
- sar didn (Score: 0.38)
- blood loss (Score: 0.38)
- legs didn (Score: 0.38)
- didn eat (Score: 0.38)
- didn blood (Score: 0.38)
- dog went (Score: 0.36)
- blood (Score: 0.36)
- sar amit (Score: 0.34)
- amit sar (Score: 0.34)


In [65]:
# Sentiment Analysis using Hugging Face Transformers
from transformers import pipeline

print("\n Sentiment Analysis")
classifier = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.



 Sentiment Analysis


Device set to use cpu


In [66]:
# Full text sentiment
sentiment = classifier(transcribed_text[:512])  # Max 512 tokens at once
print(f"Overall Sentiment: {sentiment[0]['label']} (Score: {sentiment[0]['score']:.2f})")


Overall Sentiment: NEGATIVE (Score: 0.99)


In [67]:
# Load the summarization model
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

Device set to use cpu


In [74]:
#Summary generate karo
summary_output = summarizer(transcribed_text, max_length=100, min_length=20, do_sample=False)

In [75]:
# Result ko extract karo
summary_text = summary_output[0]['summary_text']

In [76]:
# Save to file
with open("summary.txt", "w", encoding="utf-8") as f:
    f.write(summary_text)

In [77]:
# For short transcriptions, this will work directly:
summary = summarizer(transcribed_text[:1024], max_length=130, min_length=30, do_sample=False)

print(f"Summary: {summary[0]['summary_text']}")

Summary:  He was not able to use the two legs so he didn't know where he was . He was hit by someone or someone else. He didn't even have blood loss. He went to Amit Sar. Amit Sar took him to the gym. He took all the stuff he had. He said he is a bad man. He is only a pair of men.


In [78]:
with open("transcription.txt", "w", encoding="utf-8") as f:
    f.write(transcribed_text)


In [79]:
with open("keywords.txt", "w", encoding="utf-8") as f:
    for keyword in keywords:
        f.write(f"{keyword}\n")



In [81]:
with open("summary.txt", "w", encoding="utf-8") as f:
    f.write(summary_text)


In [82]:
import json

data = {
    "transcription": transcribed_text,
    "keywords": keywords,
    "sentiment": sentiment,
    "summary": summary_text
}

with open("result.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=4)
