# Additional End of week Exercise - week 2

Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.

This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!

If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.

I will publish a full solution here soon - unless someone beats me to it...

There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results.

In [None]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import numpy
import tempfile
import scipy.io.wavfile as wavfile
import librosa
import anthropic
import json
from io import BytesIO
from pydub import AudioSegment
from pydub.playback import play

In [None]:
# Load environment
load_dotenv(override=True)

openai_api_key     = os.getenv('OPENAI_API_KEY')
anthropic_api_key  = os.getenv('ANTHROPIC_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

if anthropic_api_key:
    print(f"Anthropic API key exists and begins {anthropic_api_key[:8]}")

else:
    print("Anthropic API key not set")

# Setting instance
openai    = OpenAI()
claude    = anthropic.Anthropic()


In [None]:
examiner_system_message = "You are an IELTS speaking session examiner that have one job to do conversation with examinee \
After the examinee greet you, you should respond their greetings and take them into conversation by choosing only one topic of your choice related to: \
1. Hometown & living place \
2. Education & Work \
3. Hobbies & Free Time \
4. Travel & Holidays \
5. Technology & Social Media \
You should respond in calm & friendly manner when engaging with conversation. Ask & Answer for no more than 1 sentence\
"

In [None]:
appraiser_system_message = """
You are an IELTS speaking session appraiser that assesses the speaking skill of participants.
For each user response in the conversation history:
- Award +1 point if the response directly addresses the question asked by the assistant
- Award -1 point if the response is irrelevant or unrelated to the question
- Base your assessment purely on relevance, not on grammar, vocabulary, or fluency
- The conversation consists of exactly 5 rounds of question-answer pairs
- Calculate a final score between -5 and +5 based on these assessments
- At the end, sum up the points

Analyze only the content provided in the conversation history. Do not ask for additional information. Answer only with this json format
[{'final_score': score}]
"""

In [None]:
category_function = {
    "name": "get_category",
    "description": "Assigns a category label based on a numeric score",
    "input_schema": {
        "type": "object",
        "properties": {
            "score": {
                "type": "integer",
                "description": "The numeric score to be categorized"
            }
        },
        "required": ["score"]
    }
}

In [None]:
def appraiser(history):
    message = claude.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=200,
        temperature=1,
        system=appraiser_system_message,
        messages=[
            {"role": "user", "content": f"This is how the conversation is going: {history}"}
        ],
        tools=[category_function]
    )
    return message

In [None]:
import tempfile
import subprocess
from io import BytesIO
from pydub import AudioSegment
import time

def play_audio(audio_segment):
    temp_dir = tempfile.gettempdir()
    temp_path = os.path.join(temp_dir, "temp_audio.wav")
    try:
        audio_segment.export(temp_path, format="wav")
        time.sleep(3) # Student Dominic found that this was needed. You could also try commenting out to see if not needed on your PC
        subprocess.call([
            "ffplay",
            "-nodisp",
            "-autoexit",
            "-hide_banner",
            temp_path
        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    finally:
        try:
            os.remove(temp_path)
        except Exception:
            pass
 
def talker(message):
    response = openai.audio.speech.create(
        model="tts-1",
        voice="onyx",  # Also, try replacing onyx with alloy
        input=message
    )
    audio_stream = BytesIO(response.content)
    audio = AudioSegment.from_file(audio_stream, format="mp3")
    play_audio(audio)

In [None]:
# Score for function call
category = {5:"excellent", 4:"very good", 3:"good", 2:"passed", 1:"practice more"}

def get_category(score):
    return category.get(score,"unknown")

In [None]:
# Handle tool call function
def handle_tool_call(message):
    tool_use = message.content[0].input
    score = tool_use["score"]
    category = get_category(score)
    
    return score, category

In [None]:
import base64
from io import BytesIO
from PIL import Image

def artist(score,category):
    image_response = openai.images.generate(
            model="dall-e-3",
            prompt=f"An image of  pop-art style certificate represents of IELTS Speaking Exercise consist of description about the score that users get {score} & and the achievement category {category} that users get",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

In [None]:
def respond_to_message(history_data, counter_value):
    
    image = None
    
    if not history_data or history_data[-1]["role"] != "user":
        return history_data, counter_value, image 
    
    messages = [{"role": "system", "content": examiner_system_message}] + history_data
    
    try:
        response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
        reply = response.choices[0].message.content
        
        # the assistant's response to history
        new_history = history_data + [{"role":"assistant", "content":reply}]
        
        new_counter = counter_value + 1
        print(f"Conversation turn: {new_counter}")
        
        if new_counter == 4:
            claude_response = appraiser(new_history)
            print(claude_response)
            if claude_response.content[0].type == "tool_use":
                score, category = handle_tool_call(claude_response)
                image = artist(score, category)
            new_counter = 0
            
        talker(reply)
        
        return new_history, new_counter, image
        
    except Exception as e:
        print("Error generating response:", e)
        return history_data, counter_value, image

In [None]:
def clear_conversation():
    return [], 0  # Clear history and reset counter

In [None]:
# Gradio Apps
with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        image_output = gr.Image(height=500)
    with gr.Row():
        entry = gr.Microphone(label="Begin by greeting your IELTS Examiner", type="filepath")
    with gr.Row():
        clear = gr.Button("Clear")
    
    # Maintain the conversation history as state
    history = gr.State([])
    counter = gr.State(0)
    
    def process_audio(audio_filepath, history_data):
        if audio_filepath is None:
            return history_data
        
        audio = open(audio_filepath, "rb")
        try:
            transcription = openai.audio.transcriptions.create(
                model="gpt-4o-mini-transcribe",
                file=audio
            )
            user_message = transcription.text
            
            # Create a new history by appending the user message
            new_history = history_data + [{"role":"user", "content":user_message}]
            
            return new_history
            
        except Exception as e:
            print("Error transcribing:", e)
            return history_data  # Return unchanged history on error


    entry.change(
            process_audio, 
            [entry, history], 
            [history]
        ).then(
            respond_to_message,
            [history, counter],
            [history, counter,image_output]
        ).then(
            lambda h: h,  # Pass through function to update UI
            [history],
            [chatbot]
        )
    
    # Clear button resets both the UI and the state
    clear.click(
            clear_conversation, 
            [], 
            [history, counter], 
            queue=False
        ).then(
            lambda: None,  # This updates the UI
            None, 
            chatbot, 
            queue=False
        )

# ui.launch()