# Accent Detection 

In [None]:
import pyaudio

# Initialize PyAudio
audio = pyaudio.PyAudio()

# Open a stream for real-time audio capture
stream = audio.open(format=pyaudio.paInt16, 
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=1024)

print("Recording...")
while True:
    data = stream.read(1024)
    # Process audio data here


In [None]:
import openai

# Set your API key
openai.api_key = "your_openai_api_key"

# Transcribe audio using OpenAI API
audio_file = open("media/recorded_audio.wav", "rb")
transcription = openai.Audio.transcribe("whisper-1", audio_file)
print("Transcription:", transcription["text"])


In [6]:
import pyaudio
from google.cloud import speech

def real_time_transcription():
    """Performs real-time speech-to-text transcription."""
    client = speech.SpeechClient()

    # Configure audio settings
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code="en-US",
    )
    streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True)

    def generator():
        """Generator that yields audio chunks from the microphone."""
        audio = pyaudio.PyAudio()
        stream = audio.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)

        try:
            while True:
                yield speech.StreamingRecognizeRequest(audio_content=stream.read(1024))
        except GeneratorExit:
            stream.stop_stream()
            stream.close()
            audio.terminate()

    # Perform streaming recognition
    requests = generator()
    responses = client.streaming_recognize(config=streaming_config, requests=requests)

    # Print transcriptions
    for response in responses:
        for result in response.results:
            if result.is_final:
                print(f"Transcription: {result.alternatives[0].transcript}")

# Call the function
real_time_transcription()


Transcription: 
Transcription: how much time
Transcription:  hello can you hear me
Transcription:  okay stop listening and give me the output
Transcription:  it's perfect right
Transcription:  so what do you think about it


KeyboardInterrupt: 

In [5]:
import pyaudio
import wave
from google.cloud import speech
from phonemizer import phonemize

# Configure PyAudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "user_input.wav"

def record_audio():
    """Record audio from the microphone."""
    audio = pyaudio.PyAudio()
    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)

    print("Recording...")
    frames = []
    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording stopped.")

    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded audio
    with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(audio.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))

def transcribe_audio():
    """Transcribe audio using Google Speech-to-Text."""
    client = speech.SpeechClient()
    with open(WAVE_OUTPUT_FILENAME, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code="en-US",
    )

    response = client.recognize(config=config, audio=audio)
    if response.results:
        return response.results[0].alternatives[0].transcript
    else:
        return None

def compare_pronunciations(correct_word, user_input, language="en-gb"):
    """Compare the pronunciation of a correct word with user input."""
    correct_pronunciation = phonemize(correct_word, language=language, backend="espeak", strip=True)
    user_pronunciation = phonemize(user_input, language=language, backend="espeak", strip=True)

    if correct_pronunciation == user_pronunciation:
        print("Correct pronunciation!")
    else:
        print(f"Pronunciation mismatch:\n"
              f"  Expected: {correct_pronunciation}\n"
              f"  Got: {user_pronunciation}")

# Main flow
if __name__ == "__main__":
    record_audio()
    print("Transcribing audio...")
    transcribed_text = transcribe_audio()
    if transcribed_text:
        print(f"Transcribed text: {transcribed_text}")
        compare_pronunciations("example", transcribed_text)
    else:
        print("No transcription available.")


Recording...
Recording stopped.
Transcribing audio...
Transcribed text: say something British hello can you hear me
Pronunciation mismatch:
  Expected: ɛɡzampəl
  Got: seɪ sʌmθɪŋ bɹɪtɪʃ hələʊ kan juː hiə miː


In [None]:
from pynput import keyboard
import pyaudio
import wave
import os
import whisper

# Configuration
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
OUTPUT_FOLDER = "media"
OUTPUT_FILENAME = "recorded_audio.wav"

# Create the output folder if it doesn't exist
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
output_path = os.path.join(OUTPUT_FOLDER, OUTPUT_FILENAME)

# Initialize PyAudio
audio = pyaudio.PyAudio()

# Open a stream for real-time audio capture
stream = audio.open(format=FORMAT, 
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)


print("Recording... Press 'S' to stop.")

frames = []
recording = True

# Function to handle key presses
def on_press(key):
    global recording
    try:
        if key.char == 's':  # Stop recording when 'S' is pressed
            print("\n'S' key pressed. Stopping recording...")
            recording = False
            return False  # Stop the listener
    except AttributeError:
        pass

# Start recording
with keyboard.Listener(on_press=on_press) as listener:
    while recording:
        data = stream.read(CHUNK)
        frames.append(data)
    listener.join()

# Stop and close the stream
stream.stop_stream()
stream.close()
audio.terminate()

# Save the recorded data to a WAV file
with wave.open(output_path, 'wb') as wf:
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(audio.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))

print(f"Audio file saved at: {output_path}")

# Load the Whisper model
print("Loading Whisper model...")
model = whisper.load_model("base")  # Use "base", "small", "medium", or "large"

# Transcribe the audio
print("Transcribing audio...")
result = model.transcribe(output_path)
print("Transcription:")
print(result["text"])