<a href="https://colab.research.google.com/github/MK316/Fall2023/blob/main/Phonetics_TranscriptionRead.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🍀 Transcription reading (11/30, Phonetics): record speech and the app displays as it identifies (using STT Whisper)

# Part I: Recording and recognition

## [0] Install necessary packages

In [None]:
%%capture
!pip install gradio
!pip install openai-whisper
!pip install qrcode
!pip install git+https://github.com/openai/whisper.git

## [2] Implement STT using Gradio: getting a live link

In [None]:
#@markdown App using Whisper: speech to text
import gradio as gr
import whisper

def speech_to_text(audio_file_path):
    try:
        model = whisper.load_model("tiny")  # Using "tiny" for better compatibility
        audio = whisper.load_audio(audio_file_path)
        result = model.transcribe(audio)
        return result["text"]
    except Exception as e:
        return f"An error occurred: {str(e)}"

iface = gr.Interface(
    fn=speech_to_text,
    inputs=gr.Audio(type="filepath", label="Record your voice"),
    outputs="text",
    title="Speech to Text Converter",
    description="Record your voice and convert it to text using Whisper."
)

iface.launch()


In [None]:
!pip install qrcode

## [3] QR code to generate

In [None]:
#@markdown 🌀 QR code to generate
import qrcode
from IPython.display import display
from PIL import Image

# Replace YOUR_URL_HERE with the URL you copied from the Gradio output
url = input("Enter the URL to generate QR code: ")

# Generate QR code
qr = qrcode.QRCode(
    version=1,
    error_correction=qrcode.constants.ERROR_CORRECT_H,
    box_size=12,
    border=4,
)
qr.add_data(url)
qr.make(fit=True)
img = qr.make_image(fill_color="black", back_color="white")

# Display the QR code
display(img)

# Save the QR code image
image_path = '/content/qr_code.png'  # Specify your desired path and file name
img.save(image_path)
print(f"QR code saved at {image_path}")


[QR to paste the recognized text](https://docs.google.com/spreadsheets/d/12_hgfEpoYFlBbq-FHyMtpnv_bPuaxL-_c3jiyemffoU/edit#gid=1549122206)

In [None]:
#@markdown 🌀 QR code to generate (Google sheet)
import qrcode
from IPython.display import display
from PIL import Image

# Replace YOUR_URL_HERE with the URL you copied from the Gradio output
url = "https://docs.google.com/spreadsheets/d/12_hgfEpoYFlBbq-FHyMtpnv_bPuaxL-_c3jiyemffoU/edit?usp=sharing"

# Generate QR code
qr = qrcode.QRCode(
    version=1,
    error_correction=qrcode.constants.ERROR_CORRECT_H,
    box_size=10,
    border=4,
)
qr.add_data(url)
qr.make(fit=True)
img = qr.make_image(fill_color="black", back_color="white")

# Display the QR code
display(img)

# Save the QR code image
image_path = '/content/qr_code_google.png'  # Specify your desired path and file name
img.save(image_path)
print(f"QR code saved at {image_path}")


# Part II: Analysis of †he result

# [1] Install packages

In [None]:
!pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2

In [None]:
%%capture
!pip install gtts jiwer matplotlib pandas

# [2] Google authentification & File to read

In [None]:
#@markdown 🐳 Google authentification

from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default

# Authenticate and create the gspread client
creds, _ = default()
gc = gspread.authorize(creds)

# Open the Google Sheet and read it into a DataFrame
spreadsheet_name = 'transcription1124'  # Replace with your Google Sheet name
worksheet = gc.open(spreadsheet_name).sheet1

# Get all the values in the Sheet
rows = worksheet.get_all_values()

# Convert to a DataFrame
import pandas as pd
df = pd.DataFrame.from_records(rows[1:], columns=rows[0])

# Display the first few rows of the DataFrame
print(df['Group'].head())



[text to compare](https://raw.githubusercontent.com/MK316/Fall2023/main/original.md)

# Modification for different numbers of groups (members)

# [3] Calculating WER, finding missing and added words

WER is essentially a ratio of the total number of errors (substitutions, insertions, deletions) to the total number of words in the reference text.

Here is the basic formula for WER:

$$
WER = \frac{\text{Substitutions} + \text{Insertions} + \text{Deletions}}{\text{Number of Words in Reference}}
$$




In [None]:
#@markdown 🐳 WER, Missing words, Added words

from jiwer import wer
import pandas as pd
from collections import Counter

# Function to calculate WER
def calculate_wer(recognized):
    return wer(original_text, recognized)

# Function to find missing words
def find_missing_words(recognized):
    original_words = original_text.split()
    recognized_words = recognized.split()

    original_count = Counter(original_words)
    recognized_count = Counter(recognized_words)

    missing_words = []
    for word in original_count:
        if original_count[word] > recognized_count.get(word, 0):
            missing_words.append(word)

    return ', '.join(missing_words)

# Function to find added words
def find_added_words(recognized):
    original_words = set(original_text.split())
    recognized_words = set(recognized.split())

    added_words = recognized_words - original_words
    return ', '.join(added_words)

# Prompt the user to enter the original text
original_text = input("Please enter the original text: ")

# Assuming df is your DataFrame and it has a column named 'Recognized'

# Apply the function to calculate WER for each row
df['WER'] = df['Recognized'].apply(calculate_wer)

# Apply the function to find missing words for each row
df['Missing'] = df['Recognized'].apply(find_missing_words)

# Apply the function to find added words for each row
df['Added'] = df['Recognized'].apply(find_added_words)

# Display the updated dataframe
# df
# Calculate and print the lowest WER
lowest_wer = df['WER'].min()
print(f"The lowest WER in the dataset is: {lowest_wer}")

# [4] Say the result

In [None]:
#@markdown Say the result (for groups)

import pandas as pd
from gtts import gTTS
import IPython.display as ipd

# Assuming df is your DataFrame with columns 'Group' and 'WER'

# Group the data by 'Group' and calculate the mean WER for each group
grouped_data = df.groupby('Group')['WER'].mean()

# Find the minimum WER value
min_wer = grouped_data.min()

# Find all groups that share the minimum WER
lowest_wer_groups = grouped_data[grouped_data == min_wer].index.tolist()

# Extract the numbers from the group names and format them
group_numbers = [int(''.join(filter(str.isdigit, group))) for group in lowest_wer_groups]
group_numbers = ', '.join(map(str, group_numbers[:-1])) + ' and ' + str(group_numbers[-1]) if len(group_numbers) > 1 else str(group_numbers[0])

# Create the message
message = f"Group number {group_numbers}? You did a great job for this activity!"

# Convert the message to speech
tts = gTTS(message)
tts.save("message.mp3")

# Play the message
ipd.Audio("message.mp3")


In [None]:
#@markdown 🐾 Say the result (for individuals)
import pandas as pd
from gtts import gTTS
import IPython.display as ipd

# Assuming df is your DataFrame with columns 'Names' and 'WER'

# Group the data by 'Names' and calculate the mean WER for each name
grouped_data = df.groupby('Names')['WER'].mean()

# Find the minimum WER value
min_wer = grouped_data.min()

# Find all names that share the minimum WER
lowest_wer_names = grouped_data[grouped_data == min_wer].index.tolist()

# Format the names for the message
names_formatted = ', '.join(lowest_wer_names[:-1]) + ' and ' + lowest_wer_names[-1] if len(lowest_wer_names) > 1 else lowest_wer_names[0]

# Create the message
message = f"Okay! {names_formatted}? You did a great job for this activity!"

# Convert the message to speech
tts = gTTS(message)
tts.save("message.mp3")

# Play the message
ipd.Audio("message.mp3")


# [5] Plotting the result

In [None]:
#@markdown Barplot (in order)
import matplotlib.pyplot as plt
import pandas as pd

# Assuming df is your DataFrame with columns 'Group' and 'WER'

# Group the data by 'Group' and calculate the mean WER for each group
grouped_data = df.groupby('Group')['WER'].mean()

# Sort the grouped data by WER values in ascending order
grouped_data_sorted = grouped_data.sort_values()

# Create the bar plot
plt.figure(figsize=(10, 6))
grouped_data_sorted.plot(kind='bar', color='skyblue')

# Set the y-axis limit
plt.ylim(0, 1)

# Adding titles and labels
plt.title('Word Error Rate by Group (Low to High)')
plt.xlabel('Group')
plt.ylabel('WER')

# Show the plot
plt.show()


In [None]:
#@markdown Barplot all
import matplotlib.pyplot as plt
import pandas as pd

# Assuming df is your DataFrame with columns 'Group' and 'WER'

# Group the data by 'Group' and calculate the mean WER for each group
grouped_data = df.groupby('Group')['WER'].mean()

# Sort the grouped data by group names to maintain the original order (like G1, G2, ...)
grouped_data = grouped_data.sort_index()

# Create the bar plot
plt.figure(figsize=(10, 6))
grouped_data.plot(kind='bar', color='skyblue')

# Set the y-axis limit
plt.ylim(0, 1)

# Adding titles and labels
plt.title('Word Error Rate by Group')
plt.xlabel('Group')
plt.ylabel('WER')

# Show the plot
plt.show()


# plot for individuals

In [None]:
#@markdown Barplot (in order)
import matplotlib.pyplot as plt
import pandas as pd

# Assuming df is your DataFrame with columns 'Group' and 'WER'

# Group the data by 'Group' and calculate the mean WER for each group
grouped_data = df.groupby('Names')['WER'].mean()

# Sort the grouped data by WER values in ascending order
grouped_data_sorted = grouped_data.sort_values()

# Create the bar plot
plt.figure(figsize=(10, 6))
grouped_data_sorted.plot(kind='bar', color='skyblue')

# Set the y-axis limit
plt.ylim(0, 1)

# Adding titles and labels
plt.title('Word Error Rate by Members (Low to High)')
plt.xlabel('Names')
plt.ylabel('WER')

# Show the plot
plt.show()


# [6] The result table

In [None]:
df