In [None]:
pip install SpeechRecognition nltk pydub



In [None]:
!pip install pydub



In [28]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [22]:
import speech_recognition as sr
from pydub import AudioSegment
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.probability import FreqDist
from heapq import nlargest
import os

In [23]:
# Function to split audio file into smaller clips
def split_audio(audio_file, chunk_size_ms=5000):
    sound = AudioSegment.from_wav(audio_file)
    chunks = []
    for i in range(0, len(sound), chunk_size_ms):
        chunks.append(sound[i:i + chunk_size_ms])
    return chunks

In [24]:
# Function to convert speech to text
def speech_to_text(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio_data)
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio")
        return ""

In [25]:
# Function to summarize text
def summarize_text(text):
    sentences = sent_tokenize(text)
    words = [word for word in text.split() if word.lower() not in stopwords.words('english')]
    stemmer = PorterStemmer()
    stemmed_words = [stemmer.stem(word) for word in words]
    frequency_distribution = FreqDist(stemmed_words)

    most_frequent_words = nlargest(10, frequency_distribution, key=frequency_distribution.get)
    summary_sentences = [sentence for sentence in sentences if any(word in sentence for word in most_frequent_words)]

    return ' '.join(summary_sentences)

In [26]:
# Main function
def main(audio_file):
    chunks = split_audio(audio_file)
    output_text = ""
    for i, chunk in enumerate(chunks):
        chunk.export(f"chunk_{i}.wav", format="wav")
        print(f"Processing chunk {i}")
        text = speech_to_text(f"chunk_{i}.wav")
        if text:
            summary = summarize_text(text)
            output_text += summary + "\n"
        else:
            print(f"No speech detected in chunk {i}")
    with open("output.txt", "w") as file:
        file.write(output_text)

    # Clean up temporary files
    for i in range(len(chunks)):
        os.remove(f"chunk_{i}.wav")


In [29]:
if __name__ == "__main__":
    audio_file = "long1.wav"  # Replace with your audio file path
    # audio_file = "D:\long1.wav"  # Replace with your audio file
    main(audio_file)



Processing chunk 0
Processing chunk 1
Processing chunk 2
Processing chunk 3
Processing chunk 4
Processing chunk 5
Processing chunk 6
Processing chunk 7
Processing chunk 8
Processing chunk 9
Processing chunk 10
Processing chunk 11
Processing chunk 12
Processing chunk 13
Processing chunk 14
Speech recognition could not understand audio
No speech detected in chunk 14
Processing chunk 15
Speech recognition could not understand audio
No speech detected in chunk 15
Processing chunk 16
Processing chunk 17
Speech recognition could not understand audio
No speech detected in chunk 17
Processing chunk 18
Processing chunk 19
Processing chunk 20
Processing chunk 21
Processing chunk 22
Processing chunk 23
Processing chunk 24
Processing chunk 25
Speech recognition could not understand audio
No speech detected in chunk 25
Processing chunk 26
Processing chunk 27
Processing chunk 28
Processing chunk 29
Processing chunk 30
Processing chunk 31
Processing chunk 32
Speech recognition could not understand au