# Audio testing
This notebook is focused on finding the best solution on detecting words from audio

In [6]:
import whisper
import os
import time
from transformers import pipeline
from fuzzywuzzy import process
from dotenv import load_dotenv
import openai
import requests

In [7]:
load_dotenv()

True

In [8]:
OPEN_AI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPEN_AI_API_KEY


## 1. Sending audio to backend and then use whisper

In [9]:
# model = whisper.load_model("medium")

In [3]:
print(os.path.exists("TestAudios/TestAudio1.m4a"))

True


In [15]:
start = time.time()
result = model.transcribe("TestAudios/TestAudio2.m4a")
end = time.time()

end - start

3.3483726978302

## 2. WebSpeech API + correcting in backend

In [38]:
corrector = pipeline('text2text-generation', model='prithivida/grammar_error_correcter_v1')





Device set to use cuda:0


In [21]:
# text from WebSpeech API "I think ChromaDB is essential for RAG implementation
sample_text = "I think Roma debate is essential for rock implementation" 

In [24]:
corrected = corrector(f"fix: {sample_text}")


In [23]:
print(corrected[0]['generated_text'])


I think Roma debate is essential for rock implementation.


In [26]:
transcript = "I am using Jake weary and type script in angular Jess project"
corrected = corrector(f"fix: {transcript}")

print(corrected)

[{'generated_text': 'Fix: I am using Jake weary and type script in angular Jess project.'}]


## 2. FuzzyWuzzy + GPT

In [38]:
TECHNICAL_TERMS = [
    "JSON", "jQuery", "TypeScript", "AngularJS", "Vue.js",
    "C#", ".NET", "GitHub", "Stack Overflow", "HttpClient",
    "async", "await", "REST API", "FastAPI", "Node.js",
    "Python", "NumPy", "Pandas", "TensorFlow", "PyTorch"
]

def fuzzy_correction_phrases(transcript, threshold=80):
    corrected_transcript = transcript
    for phrase in TECHNICAL_TERMS:
        match, score = process.extractOne(phrase, [transcript])
        if score >= threshold:
            corrected_transcript = corrected_transcript.replace(match, phrase)
    return corrected_transcript

class LLMCorrector:
    def __init__(self):
        self.corrector = pipeline(
            'text2text-generation',
            model='prithivida/grammar_error_correcter_v1',
            max_length=128,
            device=0 
        )

    def correct(self, text):
        prompt = f"fix grammar and spelling: {text}"
        corrected = self.corrector(prompt)
        return corrected[0]['generated_text']

class TechnicalTranscriptCorrector:
    def __init__(self):
        self.llm_corrector = LLMCorrector()

    def correct_transcript(self, transcript):
        fuzzy_corrected = fuzzy_correction_phrases(transcript)
        fully_corrected = self.llm_corrector.correct(fuzzy_corrected)
        return fully_corrected

In [39]:
corrector = TechnicalTranscriptCorrector()

Device set to use cuda:0


In [41]:
raw_transcript = "I wrote a sink function in sea sharp and upload to get help"
print("RAW:", raw_transcript)

corrected_transcript = corrector.correct_transcript(raw_transcript)
print("CORRECTED:", corrected_transcript)

RAW: I wrote a sink function in sea sharp and upload to get help
CORRECTED: I wrote a sink function in sea sharp and uploaded to get help.


## 3. GPT 3.5

In [10]:
def correct_transcript_with_tech_terms(transcript):
    prompt = f"""
    You are an expert technical text corrector.  
    Given the input transcript, correct any mistakes related to programming and technical vocabulary.
    The input focuses on LLM application, so keep in my mind that vocabulary is directly related to LLMs.

    Now correct this transcript:
    "{transcript}"
    """

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You correct technical transcripts."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2
    )

    return response.choices[0].message.content.strip()

In [14]:
start = time.time()
raw_transcript = "modern elements applications heavily rely on python-based Frameworks such as pytorch and Teresa flow utilizing transform architectures like 234 or belt with attention mechanism optimized we are scale dot product calculations I'm fine doing workflows of an integrate techniques like low rack application lower or perimeter efficient fine tuning path within Huggins face Transformers Library production deployments currently utilize Orchestra communication"
corrected = correct_transcript_with_tech_terms(raw_transcript[:len(raw_transcript)//4])
end = time.time()
print(corrected)
end - start

"Modern machine learning applications heavily rely on Python-based frameworks such as PyTorch and TensorFlow."


0.5789055824279785

'"I wrote a `Sink` function in C# and uploaded it to get help."'

## 4. Local Models

In [46]:
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "tinyllama"

In [59]:
raw_transcript = "modern elements applications heavily rely on python-based Frameworks such as pytorch and Teresa flow utilizing transform architectures like 234 or belt with attention mechanism optimized we are scale dot product calculations I'm fine doing workflows of an integrate techniques like low rack application lower or perimeter efficient fine tuning path within Huggins face Transformers Library production deployments currently utilize Orchestra communication"


In [60]:
messages = [
    {"role": "user", "content": """
    You are an expert technical text corrector.  
    Given the input transcript, correct any mistakes related to programming and technical vocabulary.
    The input focuses on LLM application, so keep in my mind that vocabulary is directly related to LLMs.

    Now correct this transcript:
    "{raw_transcript}"
    """}
]

In [61]:
payload = {
        "model": MODEL,
        "messages": messages,
        "stream": False
    }

In [49]:
!ollama pull tinyllama

[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ą [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â Ľ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 2af3b81862c6... 100% â–•â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–Ź 637 MB                         [K
pulling af0ddbdaaa26... 100% â–•â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–Ź   70 B                         [K
pulling c8472cd9daed... 100% â–•â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–Ź   31 B                         [K
pulling fa956ab37b8c... 100% â–•â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–Ź   98 B                         [K
pulling 6331358be52a... 100% â–•â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–�â–Ź  483 B                         [K
verifying sha256 digest [K
writing manifest [K
success [K[?25h[?2026l


In [63]:
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
print(response.json()['message']['content'])

    "Please correct any mistakes related to program and technical vocabulary in the following text:

      {input_transcript}
      Please keep in mind the specific focus on LLM applications, as discussed in the text. Any corrections that are related to programming or technical vocabulary will be relevant for LLMs."
