# For more than 1 Min file

In [16]:
import os
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment
import pandas as pd
from google.cloud import storage

# Set the path to your service account key JSON file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\TusharPatil\Downloads\crucial-citizen-399705-a80d6ed79874.json"

# Instantiate the SpeechClient
client = speech.SpeechClient()

# Set your Google Cloud Storage (GCS) bucket name
bucket_name = 'bucket-1232'  # Replace with your actual GCS bucket name

def upload_file_to_gcs(local_file_path, bucket_name, gcs_file_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(gcs_file_name)

    blob.upload_from_filename(local_file_path)

    return f'gs://{bucket_name}/{gcs_file_name}'

# Define a function to convert audio to mono
def convert_to_mono(input_audio_path, output_audio_path):
    audio = AudioSegment.from_wav(input_audio_path)
    audio = audio.set_channels(1)  # Convert to mono (single channel)
    audio.export(output_audio_path, format="wav")

# Define a function to transcribe audio with diarization using GCS URI
def transcribe_with_diarization_gcs(gcs_uri, file_name):
    # Define recognition audio and configuration
    audio = speech.RecognitionAudio(uri=gcs_uri)

    diarization_config = speech.SpeakerDiarizationConfig(
        enable_speaker_diarization=True,
        min_speaker_count=2,
        max_speaker_count=2,
    )

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=44100,
        language_code="en-US",
        diarization_config=diarization_config,
    )

    # Perform asynchronous recognition with speaker diarization
    operation = client.long_running_recognize(config=config, audio=audio)

    # Wait for the operation to complete
    response = operation.result()

    # Process the results and extract transcriptions
    transcriptions = []
    for result in response.results:
        words_info = result.alternatives[0].words
        current_speaker = None
        start_time = None
        transcript = ''
        for word_info in words_info:
            if current_speaker != word_info.speaker_tag:
                if current_speaker is not None:
                    transcriptions.append({
                        'file_name': file_name,
                        'speaker': current_speaker,
                        'transcript': transcript,
                        'start_time': start_time,
                        'end_time': word_info.start_time.total_seconds()
                    })
                current_speaker = word_info.speaker_tag
                start_time = word_info.start_time.total_seconds()
                transcript = word_info.word
            else:
                transcript += " " + word_info.word

        # Add the last segment to the transcriptions list
        transcriptions.append({
            'file_name': file_name,
            'speaker': current_speaker,
            'transcript': transcript,
            'start_time': start_time,
            'end_time': words_info[-1].end_time.total_seconds()
        })

    # Create a DataFrame from the transcriptions
    df = pd.DataFrame(transcriptions)

    return df

# Path to the folder containing audio files (replace with your folder path)
audio_folder_path = r"C:\Users\TusharPatil\Desktop\Audio Analysis\random"

# Initialize an empty list to store DataFrames for each file
all_dfs = []

# Iterate through the files in the folder
for filename in os.listdir(audio_folder_path):
    if filename.endswith(".wav"):
        audio_file_path = os.path.join(audio_folder_path, filename)
        print(f"Transcribing audio file: {audio_file_path}")

        # Convert the audio to mono
        mono_audio_file_path = f"/tmp/mono_{filename}"
        convert_to_mono(audio_file_path, mono_audio_file_path)

        # function call for uploading to GCS
        gcs_uri = upload_file_to_gcs(mono_audio_file_path, bucket_name, f"mono_{filename}")

        # Extract file name
        file_name = os.path.basename(audio_file_path)

        # Transcribe audio with speaker diarization using GCS URI and get the DataFrame
        transcription_df = transcribe_with_diarization_gcs(gcs_uri, file_name)

        # Append the DataFrame to the list
        all_dfs.append(transcription_df)

# Concatenate all DataFrames into one
final_df = pd.concat(all_dfs, ignore_index=True)

# Display the final DataFrame
print(final_df)





Transcribing audio file: C:\Users\TusharPatil\Desktop\Audio Analysis\random\normal (2).wav
Transcribing audio file: C:\Users\TusharPatil\Desktop\Audio Analysis\random\normal (3).wav
         file_name  speaker  \
0   normal (2).wav        0   
1   normal (2).wav        0   
2   normal (2).wav        0   
3   normal (2).wav        2   
4   normal (2).wav        1   
5   normal (2).wav        2   
6   normal (2).wav        1   
7   normal (2).wav        2   
8   normal (2).wav        1   
9   normal (2).wav        2   
10  normal (2).wav        1   
11  normal (2).wav        2   
12  normal (3).wav        0   
13  normal (3).wav        0   
14  normal (3).wav        1   
15  normal (3).wav        2   
16  normal (3).wav        1   

                                           transcript  start_time  end_time  
0                                               hello         0.0       1.5  
1   917 the office but I just get out for the plac...         8.6      57.7  
2                        

In [17]:
# Path to save the Excel file
excel_file_path =r"C:\Users\TusharPatil\Desktop\dairization.xlsx"

# Save the final DataFrame to an Excel file
final_df.to_excel(excel_file_path, index=False)

# Print a message indicating the file has been saved
print(f"DataFrame saved to Excel file: {excel_file_path}")



DataFrame saved to Excel file: C:\Users\TusharPatil\Desktop\dairization.xlsx


In [18]:
final_df

Unnamed: 0,file_name,speaker,transcript,start_time,end_time
0,normal (2).wav,0,hello,0.0,1.5
1,normal (2).wav,0,917 the office but I just get out for the plac...,8.6,57.7
2,normal (2).wav,0,pokefinder,68.9,70.0
3,normal (2).wav,2,hello 917 the office,0.0,10.3
4,normal (2).wav,1,but I just get out for the place,10.3,14.5
5,normal (2).wav,2,okay you supposed to go to your Village today,14.5,22.7
6,normal (2).wav,1,but I just canceled the plan because,22.7,26.3
7,normal (2).wav,2,today we are going to discuss about the projec...,26.3,37.8
8,normal (2).wav,1,that's why I just cancelled my plans and,37.8,40.9
9,normal (2).wav,2,yeah you can open this time in Des Plaines,40.9,48.7


# For  less than 1 Min File

In [35]:
import os
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment
import pandas as pd

# Set the path to your service account key JSON file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Users\TusharPatil\Downloads\crucial-citizen-399705-a80d6ed79874.json"

# Instantiate the SpeechClient
client = speech.SpeechClient()

def convert_to_mono(input_audio_path, output_audio_path):
    # Load the audio file and convert to mono
    audio = AudioSegment.from_wav(input_audio_path)
    audio = audio.set_channels(1)  # Convert to mono
    audio.export(output_audio_path, format="wav")

def transcribe_with_diarization(audio_file_path, file_name):
    # Convert the audio to mono
    temp_mono_file = "temp_mono.wav"
    convert_to_mono(audio_file_path, temp_mono_file)

    # Load the audio file content
    with open(temp_mono_file, "rb") as audio_file:
        content = audio_file.read()

    # Define recognition audio and configuration
    audio = speech.RecognitionAudio(content=content)

    # Clean up temporary mono audio file
    os.remove(temp_mono_file)

    diarization_config = speech.SpeakerDiarizationConfig(
        enable_speaker_diarization=True,
        min_speaker_count=2,
        max_speaker_count=2,
    )

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=44100,
        language_code="en-US",
        diarization_config=diarization_config,
    )

    # Perform recognition with speaker diarization
    response = client.recognize(config=config, audio=audio)

    # Get words, speaker tags, and timestamps
    result = response.results[-1]
    words_info = result.alternatives[0].words

    # Initialize variables to store speaker and start time
    current_speaker = None
    start_time = None
    transcript = ''

    # Initialize a list to store the transcription segments
    transcriptions = []

    # Print the output with timestamps
    for word_info in words_info:
        if current_speaker != word_info.speaker_tag:
            if current_speaker is not None:
                transcriptions.append({
                    'file_name': file_name,
                    'speaker': current_speaker,
                    'transcript': transcript,
                    'start_time': start_time,
                    'end_time': word_info.start_time.total_seconds()
                })
            current_speaker = word_info.speaker_tag
            start_time = word_info.start_time.total_seconds()
            transcript = word_info.word
        else:
            transcript += " " + word_info.word

    # Add the last segment to the transcriptions list
    transcriptions.append({
        'file_name': file_name,
        'speaker': current_speaker,
        'transcript': transcript,
        'start_time': start_time,
        'end_time': words_info[-1].end_time.total_seconds()
    })

    # Create a DataFrame from the transcriptions
    df = pd.DataFrame(transcriptions)

    return df

# Path to the folder containing audio files (replace with your folder path)
audio_folder_path = r"C:\Users\TusharPatil\OneDrive - Agivant Technlogies India Pvt. Ltd\Audio Analysis\new_audios_10_wav"

# Initialize an empty list to store DataFrames for each file
all_dfs = []

# Iterate through the files in the folder
for filename in os.listdir(audio_folder_path):
    if filename.endswith(".wav"):
        audio_file_path = os.path.join(audio_folder_path, filename)
        print(f"Transcribing audio file: {audio_file_path}")

        # Extract file name
        file_name = os.path.basename(audio_file_path)

        # Transcribe audio with speaker diarization and get the DataFrame
        transcription_df = transcribe_with_diarization(audio_file_path, file_name)

        # Append the DataFrame to the list
        all_dfs.append(transcription_df)

# Concatenate all DataFrames into one
final_df = pd.concat(all_dfs, ignore_index=True)

# Display the final DataFrame
print(final_df)

Transcribing audio file: C:\Users\TusharPatil\OneDrive - Agivant Technlogies India Pvt. Ltd\Audio Analysis\new_audios_10_wav\blank_1.wav
Transcribing audio file: C:\Users\TusharPatil\OneDrive - Agivant Technlogies India Pvt. Ltd\Audio Analysis\new_audios_10_wav\echo_2.wav
Transcribing audio file: C:\Users\TusharPatil\OneDrive - Agivant Technlogies India Pvt. Ltd\Audio Analysis\new_audios_10_wav\low_volume_2.wav
Transcribing audio file: C:\Users\TusharPatil\OneDrive - Agivant Technlogies India Pvt. Ltd\Audio Analysis\new_audios_10_wav\normal_2.wav
           file_name  speaker  \
0        blank_1.wav        2   
1         echo_2.wav        2   
2   low_volume_2.wav        1   
3   low_volume_2.wav        2   
4   low_volume_2.wav        1   
5       normal_2.wav        1   
6       normal_2.wav        2   
7       normal_2.wav        1   
8       normal_2.wav        2   
9       normal_2.wav        1   
10      normal_2.wav        2   
11      normal_2.wav        1   

                 

In [36]:
final_df

Unnamed: 0,file_name,speaker,transcript,start_time,end_time
0,blank_1.wav,2,hello hello hello hi Sagar hello how are you h...,0.6,31.7
1,echo_2.wav,2,wow that sounds heavenly.,23.3,47.8
2,low_volume_2.wav,1,hello can you get,0.7,3.9
3,low_volume_2.wav,2,rid of negative numbers will stop at nothing t...,3.9,42.0
4,low_volume_2.wav,1,I was going to,42.0,43.8
5,normal_2.wav,1,you know I'm reading a book of Antigravity is ...,0.9,7.4
6,normal_2.wav,2,sounds fascinating you know I used to be a be ...,7.4,12.7
7,normal_2.wav,1,impressive indeed to hear about the guy who in...,12.7,18.9
8,normal_2.wav,2,did you hear about the guy who invented Lifesa...,18.9,28.8
9,normal_2.wav,1,place we keep track on a,28.8,38.0


In [37]:
# Path to save the Excel file
excel_file_path =r"C:\Users\TusharPatil\Desktop\dairization2.xlsx"

# Save the final DataFrame to an Excel file
final_df.to_excel(excel_file_path, index=False)

# Print a message indicating the file has been saved
print(f"DataFrame saved to Excel file: {excel_file_path}")


DataFrame saved to Excel file: C:\Users\TusharPatil\Desktop\dairization2.xlsx
