In [1]:
import os
import re
import speech_recognition as sr
import spacy
import pandas as pd

In [2]:
# Function to transcribe audio files into text
def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)
    try:
        return recognizer.recognize_google(audio_data)
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio")
        return ""
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))
        return ""

In [3]:
# Function to extract user information from transcribed text
def extract_user_info(transcribed_text):
    # Initialize spaCy NLP model
    nlp = spacy.load("en_core_web_sm")

    # Perform named entity recognition
    doc = nlp(transcribed_text)

    # Extract entities of interest (e.g., person names, locations)
    name = None
    address = None
    phone_number = None
    occupation = None

    for ent in doc.ents:
        if ent.label_ == "PERSON" and not name:
            name = ent.text
        elif ent.label_ == "GPE" and not address:
            address = ent.text
        # Add additional conditions for phone numbers, occupations, etc.

    return name, address, phone_number, occupation

In [7]:
if __name__ == "__main__":
    # Process multiple audio files
    audio_folder = "D:\check_audio"  # Folder containing audio files
    output_excel_file = "user_info.xlsx"  # Output Excel file name

    # Create an empty DataFrame to store user information
    columns = ["Name", "Address", "Phone Number", "Occupation"]
    user_info_df = pd.DataFrame(columns=columns)

    # Iterate through audio files
    for audio_file in os.listdir(audio_folder):
        if audio_file.endswith(".wav"):
            audio_path = os.path.join(audio_folder, audio_file)
            transcribed_text = transcribe_audio(audio_path)
            name, address, phone_number, occupation = extract_user_info(transcribed_text)

            # Append extracted information to DataFrame
            user_info_df = user_info_df.append({
                "Name": name,
                "Address": address,
                "Phone Number": phone_number,
                "Occupation": occupation
            }, ignore_index=True)

  

  user_info_df = user_info_df.append({
  user_info_df = user_info_df.append({


In [8]:
    # Save DataFrame to Excel file
    user_info_df.to_excel(output_excel_file, index=False)
    print(f"User information saved to {output_excel_file}")


User information saved to user_info.xlsx
