In [2]:
import csv
import os
import shutil

# Path to the CSV file
csv_file_path = "Downloads/filenames.csv"

# Path to the folder with 5.8k audio files
audio_folder_path = "OneDrive - Manipal University Jaipur/train"

# Path to the folder where you want to save the selected audio files
output_folder_path = "OneDrive - Manipal University Jaipur/train_500"

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

# Read the CSV file and extract the audio file names
with open(csv_file_path, 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    audio_files = [row[0] for row in csvreader]

# Iterate through each audio file name and copy it to the output folder if it exists
for audio_file in audio_files:
    src_file_path = os.path.join(audio_folder_path, audio_file)
    if os.path.exists(src_file_path):
        dest_file_path = os.path.join(output_folder_path, audio_file)
        shutil.copy(src_file_path, dest_file_path)
    else:
        print(f"File not found: {audio_file}")

print("Done!")


File not found: file_name
Done!


In [3]:
!pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.1-py2.py3-none-any.whl (32.8 MB)
     ---------------------------------------- 32.8/32.8 MB 7.0 MB/s eta 0:00:00
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.1


In [8]:
import os
import pandas as pd
import speech_recognition as sr

# Initialize the recognizer
recognizer = sr.Recognizer()

# Path to the folder containing audio files
folder_path = "OneDrive - Manipal University Jaipur/train_500"

# Initialize an empty list to store the DataFrames for each file
dfs = []

# Iterate over each audio file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".wav"):  # Adjust if your audio files have a different extension
        audio_file = os.path.join(folder_path, filename)
        with sr.AudioFile(audio_file) as source:
            audio_data = recognizer.record(source)
        
        # Use Google Web Speech API for transcription
        try:
            text = recognizer.recognize_google(audio_data)
            df = pd.DataFrame({'filename': [filename], 'transcript': [text]})
            dfs.append(df)
        except sr.UnknownValueError:
            print(f"Could not understand audio: {filename}")
        except sr.RequestError as e:
            print(f"Could not request results for audio {filename}: {e}")

# Concatenate all DataFrames into a single DataFrame
result = pd.concat(dfs, ignore_index=True)


# Print the DataFrame with the results
print(result.head(5))


Could not understand audio: 1249120_15004831_74682993.wav
Could not request results for audio 1249120_15965551_20749336.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_25492795.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_30586097.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_35401223.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_41232322.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_42589900.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_44782675.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_50224784.wav: recognition request failed: Bad Request
Could not request results for audio 1249120_15965551_53555531.wav: recognition request

In [24]:
result.rename(columns={'filename': 'file_name'}, inplace=True)

In [25]:
result

Unnamed: 0,file_name,transcript
0,1249120_13842059_104469105.wav,I have a painful cramp in my feet
1,1249120_13842059_105045085.wav,the pain feels like it's right below the skin
2,1249120_13842059_11964685.wav,I feel suicidal
3,1249120_13842059_12420758.wav,I feel a sharp pain in my ankle joint when I
4,1249120_13842059_13041979.wav,my shoulder hurts me so much
...,...,...
470,1249120_6338946_95779210.wav,my ankle is hurting me
471,1249120_6338946_97962298.wav,I can't walk because I have a great
472,1249120_6338946_98107593.wav,when I try to be warm and wear more clothes I ...
473,1249120_6338946_99675257.wav,I've always been very active but now I just do...


In [27]:
import pandas as pd

# Load the CSV file with actual transcripts into a DataFrame
actual_transcripts_df = pd.read_csv("Downloads/overview-of-recordings.csv")

# Extract only the 'phrase' and 'filename' columns
actual_transcripts_subset = actual_transcripts_df[['phrase', 'file_name']]

# Merge the actual transcripts DataFrame with the result DataFrame based on filenames
result_with_actual_transcripts = pd.merge(result, actual_transcripts_subset, on="file_name", how="left")

# Print the updated DataFrame
print(result_with_actual_transcripts)


                          file_name  \
0    1249120_13842059_104469105.wav   
1    1249120_13842059_105045085.wav   
2     1249120_13842059_11964685.wav   
3     1249120_13842059_12420758.wav   
4     1249120_13842059_13041979.wav   
..                              ...   
470    1249120_6338946_95779210.wav   
471    1249120_6338946_97962298.wav   
472    1249120_6338946_98107593.wav   
473    1249120_6338946_99675257.wav   
474    1249120_6338946_99780649.wav   

                                            transcript  \
0                    I have a painful cramp in my feet   
1        the pain feels like it's right below the skin   
2                                      I feel suicidal   
3         I feel a sharp pain in my ankle joint when I   
4                         my shoulder hurts me so much   
..                                                 ...   
470                             my ankle is hurting me   
471                I can't walk because I have a great   
472  when

In [28]:
result_with_actual_transcripts.rename(columns={'phrase': 'actual_transcript'}, inplace=True)

In [43]:
result_with_actual_transcripts

Unnamed: 0,file_name,transcript,actual_transcript
0,1249120_13842059_104469105.wav,I have a painful cramp in my feet,I have a painful cramp in my feet
1,1249120_13842059_105045085.wav,the pain feels like it's right below the skin,The pain feels like it's right below the skin
2,1249120_13842059_11964685.wav,I feel suicidal,I feel suicidal.
3,1249120_13842059_12420758.wav,I feel a sharp pain in my ankle joint when I,I feel a sharp pain in my ankle joint when I s...
4,1249120_13842059_13041979.wav,my shoulder hurts me so much,My shoulder hurts me so much
...,...,...,...
470,1249120_6338946_95779210.wav,my ankle is hurting me,my ankle is hurting me
471,1249120_6338946_97962298.wav,I can't walk because I have a great,I can't walk because i have a great foot ache
472,1249120_6338946_98107593.wav,when I try to be warm and wear more clothes I ...,When I tried to be warm and wear more clothes ...
473,1249120_6338946_99675257.wav,I've always been very active but now I just do...,I've always been very active but now I just do...


In [30]:
!pip install python-Levenshtein

Collecting python-Levenshtein
  Downloading python_Levenshtein-0.24.0-py3-none-any.whl (9.4 kB)
Collecting Levenshtein==0.24.0
  Downloading Levenshtein-0.24.0-cp39-cp39-win_amd64.whl (98 kB)
     ---------------------------------------- 98.6/98.6 kB 5.9 MB/s eta 0:00:00
Collecting rapidfuzz<4.0.0,>=3.1.0
  Downloading rapidfuzz-3.6.1-cp39-cp39-win_amd64.whl (1.6 MB)
     ---------------------------------------- 1.6/1.6 MB 20.9 MB/s eta 0:00:00
Installing collected packages: rapidfuzz, Levenshtein, python-Levenshtein
Successfully installed Levenshtein-0.24.0 python-Levenshtein-0.24.0 rapidfuzz-3.6.1


In [31]:
import Levenshtein

def wer(reference, hypothesis):
    """
    Calculate Word Error Rate (WER) between reference and hypothesis.
    """
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()

    # Calculate Levenshtein distance
    distance = Levenshtein.distance(reference_words, hypothesis_words)

    # Calculate WER
    wer = distance / len(reference_words)
    return wer

def cer(reference, hypothesis):
    """
    Calculate Character Error Rate (CER) between reference and hypothesis.
    """
    # Calculate Levenshtein distance
    distance = Levenshtein.distance(reference, hypothesis)

    # Calculate CER
    cer = distance / len(reference)
    return cer


In [33]:
x = len(result_with_actual_transcripts['transcript'])
x

475

In [46]:
# Example Usage
wer_scores = []
cer_scores = []
for index, rows in result_with_actual_transcripts.iterrows():
    reference_text = str(result_with_actual_transcripts['actual_transcript'])
    hypothesis_text = str(result_with_actual_transcripts['transcript'])
    
    wer_score = wer(reference_text, hypothesis_text)
    cer_score = cer(reference_text, hypothesis_text)
    
    wer_scores.append(wer_score)
    cer_scores.append(cer_score)

result_with_actual_transcripts['WER_Score'] = wer_scores
result_with_actual_transcripts['CER_Score'] = cer_scores
result_with_actual_transcripts

Unnamed: 0,file_name,transcript,actual_transcript,WER_Score,CER_Score
0,1249120_13842059_104469105.wav,I have a painful cramp in my feet,I have a painful cramp in my feet,0.112245,0.070796
1,1249120_13842059_105045085.wav,the pain feels like it's right below the skin,The pain feels like it's right below the skin,0.112245,0.070796
2,1249120_13842059_11964685.wav,I feel suicidal,I feel suicidal.,0.112245,0.070796
3,1249120_13842059_12420758.wav,I feel a sharp pain in my ankle joint when I,I feel a sharp pain in my ankle joint when I s...,0.112245,0.070796
4,1249120_13842059_13041979.wav,my shoulder hurts me so much,My shoulder hurts me so much,0.112245,0.070796
...,...,...,...,...,...
470,1249120_6338946_95779210.wav,my ankle is hurting me,my ankle is hurting me,0.112245,0.070796
471,1249120_6338946_97962298.wav,I can't walk because I have a great,I can't walk because i have a great foot ache,0.112245,0.070796
472,1249120_6338946_98107593.wav,when I try to be warm and wear more clothes I ...,When I tried to be warm and wear more clothes ...,0.112245,0.070796
473,1249120_6338946_99675257.wav,I've always been very active but now I just do...,I've always been very active but now I just do...,0.112245,0.070796
