In [12]:
# Import all the relevant libraries
from vosk import Model, KaldiRecognizer
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import pyaudio
import spacy
from fpdf import FPDF
import json
import time
import os
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

In [13]:
def transcribe_audio():
    model_path = './extracted_files/vosk-model-small-en-us-0.15' 
    model = Model(model_path)
    recognizer = KaldiRecognizer(model, 16000)
    
    # Set up microphone input
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
    stream.start_stream()

    print("Start speaking... (press Ctrl+C to stop)")
    response_text = ""
    
    try:
        while True:
            data = stream.read(4000, exception_on_overflow=False)
            if recognizer.AcceptWaveform(data):
                # Transcription of the detected speech
                result = recognizer.Result()
                print("Transcribed:", result)
                text = json.loads(result)["text"]
                
                # Collect the transcribed text for further use
                response_text += " " + text
                # If the speech is concluded, exit loop
                if response_text.strip() != "":
                    return response_text.strip()
    except KeyboardInterrupt:
        print("Stopping transcription...")
        return response_text.strip()


In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Neo Model
model_name = "EleutherAI/gpt-neo-125m"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, token=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Automatically map layers to available GPUs/CPUs
    torch_dtype="auto",  # Automatically choose the appropriate precision
    token=True,

    )


In [15]:
def ask_question(prompt):
    """
    Generate IELTS-style questions using the LLaMA model.
    
    Args:
        prompt (str): The input prompt for generating a question.
    
    Returns:
        str: The generated question.
    """
    # Ensure the tokenizer has a padding token
    tokenizer.pad_token = tokenizer.eos_token  # Use eos_token as pad_token
    
    # Tokenize the input with attention mask
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        return_attention_mask=True
    ).to("cpu")  # or .to("cpu") if no GPU

    # Generate the output with explicit pad_token_id
    outputs = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        pad_token_id=tokenizer.pad_token_id,
        max_length=50,
        num_beams=5,
        temperature=0.7
    )
    
    # Decode the generated text and return
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [16]:
def analyze_response(response):
    nlp = spacy.load("en_core_web_lg")
    doc = nlp(response)
    grammar_errors = []
    corrected_sentences = []
    vocabulary_suggestions = []
    pronunciation_tips = []
    filler_words = ["um", "uh", "like", "so", "you know"]  # Example filler words for fluency analysis
    filler_count = sum(response.lower().count(fw) for fw in filler_words)

    # Grammar checks and corrections
    for sent in doc.sents:
        tokens = list(sent)
        for token in tokens:
            if token.dep_ == "ROOT" and token.tag_ not in ["VBD", "VBG", "VBN", "VBZ"]:  # Verb form check
                grammar_errors.append(f"Incorrect verb form: {token.text}")
        
        # Generate a corrected sentence (simple example, improve with grammar models)
        corrected_sentence = " ".join([t.text if t.text not in grammar_errors else f"({t.text})" for t in tokens])
        corrected_sentences.append(corrected_sentence)

    # Lexical Resource: Vocabulary analysis and suggestions
    token_counts = doc.count_by(spacy.attrs.LOWER)
    overused_words = [doc.vocab[lower].text for lower, count in token_counts.items() if count > 2]
    for word in overused_words:
        vocabulary_suggestions.append(f"Try using synonyms for '{word}'.")

    # Fluency and coherence scoring
    coherence_score = len(list(doc.sents)) / (filler_count + 1)  # Simplified metric for coherence

    # Pronunciation feedback placeholder (requires phoneme analysis)
    pronunciation_tips.append("Practice clearer enunciation for better scoring.")

    # Word and grammar scores
    word_count = len(doc)
    fluency_score = max(0, 10 - filler_count)  # Example scoring metric
    grammar_score = max(0, 10 - len(grammar_errors))

    return {
        "grammar_errors": grammar_errors,
        "corrected_sentences": corrected_sentences,
        "vocabulary_suggestions": vocabulary_suggestions,
        "pronunciation_tips": pronunciation_tips,
        "word_count": word_count,
        "fluency_score": fluency_score,
        "coherence_score": coherence_score,
        "grammar_score": grammar_score
    }


In [17]:
def generate_report(responses, analyses, filename="IELTS_Feedback.pdf"):
    # This function basically creates a pdf file based on the resuls generated above. 
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.cell(200, 10, txt="IELTS Speaking Test Feedback", ln=True, align="C")

    # Assign based on a criteria
    for i, (response, analysis) in enumerate(zip(responses, analyses)):
        pdf.cell(200, 10, txt=f"Part {i+1} Response: {response}", ln=True)
        pdf.cell(200, 10, txt=f"Analysis:", ln=True)
        pdf.cell(200, 10, txt=f"- Word Count: {analysis['word_count']}", ln=True)
        pdf.cell(200, 10, txt=f"- Fluency Score: {analysis['fluency_score']}", ln=True)
        pdf.cell(200, 10, txt=f"- Coherence Score: {analysis['coherence_score']}", ln=True)
        pdf.cell(200, 10, txt=f"- Grammar Score: {analysis['grammar_score']}", ln=True)

        if analysis["grammar_errors"]:
            pdf.cell(200, 10, txt="Grammar Errors:", ln=True)
            for error in analysis["grammar_errors"]:
                pdf.cell(200, 10, txt=f"  * {error}", ln=True)
        if analysis["vocabulary_suggestions"]:
            pdf.cell(200, 10, txt="Vocabulary Suggestions:", ln=True)
            for suggestion in analysis["vocabulary_suggestions"]:
                pdf.cell(200, 10, txt=f"  * {suggestion}", ln=True)
        if analysis["pronunciation_tips"]:
            pdf.cell(200, 10, txt="Pronunciation Tips:", ln=True)
            for tip in analysis["pronunciation_tips"]:
                pdf.cell(200, 10, txt=f"  * {tip}", ln=True)

    pdf.output(filename)
    print(f"Report saved as {filename}")


In [18]:
def test_mode():
    print("You are now in Test Mode.")
    print("This is a full IELTS Speaking Test with 3 parts.")

    responses = []
    analyses = []

    # Part 1: Introduction
    print("\nPart 1: Introduction")
    question1 = ask_question('Ask a simply question')
    print("Examiner:", question1)
    response1 = transcribe_audio()
    print("Your response:", response1)
    analysis1 = analyze_response(response1)
    responses.append(response1)
    analyses.append(analysis1)

    # Part 2: Long Turn (Cue Card Activity)
    print("\nPart 2: Long Turn (Cue Card Activity)")
    question2 = ask_question("Create another long question")
    print("Examiner:", question2)
    response2 = transcribe_audio()
    print("Your response:", response2)
    analysis2 = analyze_response(response2)
    responses.append(response2)
    analyses.append(analysis2)

    # Part 3: Two-Way Discussion
    print("\nPart 3: Two-Way Discussion")
    question3 = ask_question("Create an IELTS-style two-way discussion question.")
    print("Examiner:", question3)
    response3 = transcribe_audio()
    print("Your response:", response3)
    analysis3 = analyze_response(response3)
    responses.append(response3)
    analyses.append(analysis3)

    # Generate a comprehensive feedback report
    generate_report(responses, analyses, filename="IELTS_Test_Feedback.pdf")
    print("Test complete. Feedback report saved as 'IELTS_Test_Feedback.pdf'.")



In [19]:
# Run the program
if __name__ == "__main__":
    test_mode()

You are now in Test Mode.
This is a full IELTS Speaking Test with 3 parts.

Part 1: Introduction




Examiner: Ask a simply question?

A:

If you want to know the answer to your question, you can use the following code:
public static void main(String[] args) {
    System.out.println("
Start speaking... (press Ctrl+C to stop)
Transcribed: {
  "text" : ""
}
Transcribed: {
  "text" : ""
}
Transcribed: {
  "text" : "i don't understand what you're saying"
}
Your response: i don't understand what you're saying

Part 2: Long Turn (Cue Card Activity)
Examiner: Create another long question.

A:

You can do it like this:
public static void main(String[] args) {
    int i = 0;
    int j = 0;
  
Start speaking... (press Ctrl+C to stop)
Transcribed: {
  "text" : "okay"
}
Your response: okay

Part 3: Two-Way Discussion
Examiner: Create an IELTS-style two-way discussion question.

A:

There are two ways to do this:

Create an IELTS-style two-way discussion question.
Create an IELTS-style
Start speaking... (press Ctrl+C to stop)
Transcribed: {
  "text" : ""
}
Transcribed: {
  "text" : ""
}
Transcribe