In [None]:
# Quantum Tutor: Voice Assistant for Visually Impaired Learners

## Project Overview  

This project extends a previous summarization tool into a **voice-enabled AI assistant** designed with accessibility in mind. It leverages **OpenAI’s GPT models** to summarize Microsoft’s *Quantum Computing Fundamentals* learning path and now supports **hands-free interaction via speech**. Users can navigate, ask questions, and receive answers entirely through **voice input and audio output**, making it especially useful for visually impaired learners.  

## Key Features  

- **Accessible Voice Interaction**  
  - Users speak their questions directly (speech-to-text).  
  - The assistant replies both in text and with synthesized speech (text-to-speech).  
  - Optimized for screen readers: Gradio’s ARIA-label support ensures full keyboard and assistive technology compatibility.  

- **Lesson-level Summaries**: Generates summaries for each lesson inside every module.  
- **Full-course Summaries**: Aggregates lesson insights into a comprehensive course overview.  
- **Customizable Options**: Choose summary length and output language.  
- **Interactive Chatbot Flow**: Clear, repeatable workflow for navigating with keyboard or voice only.  
- **Clean Markdown Output**: Well-structured summaries that remain screen-reader friendly.  

## Tech Stack  

- **Models**: GPT-4o-mini  
- **Interface**: Gradio (ARIA-label enabled)  
- **Language**: Python  
- **Libraries**: BeautifulSoup, OpenAI, Speech-to-Text, Text-to-Speech  

## Purpose  

The assistant demonstrates how AI can **make complex technical education more accessible** by combining summarization with voice-first interaction. It highlights **inclusive design principles**—providing equal access to technical learning for users who rely on screen readers or voice navigation.  


In [None]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI
import gradio as gr

# If you get an error running this cell, then please head over to the troubleshooting notebook!

In [None]:
# Load environment variables  from .env file (not included)

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them")
else:
    print("API key found and looks good so far!")


In [None]:
openai = OpenAI()

# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.


In [None]:
# ollama api
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL_LLAMA = "llama3.2"

In [None]:
!ollama pull llama3.2

In [None]:
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [None]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

        links = [a.get("href") for a in soup.find_all("a", href=True)]
        self.links = [link.strip() for link in links if link]

In [None]:
# Create a system prompt function that can use different language and length 

def build_system_prompt(language="Spanish", length="short"):
    return f"""You are an assistant that analyzes the contents of a website and provides a {length} summary, ignoring text that might be navigation related.
    Respond in markdown, and respond in {language}.
    """
    
    
                        

In [None]:
# Create a function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a {length} summary in {language} of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [None]:

def messages_for(website, language, length):
    return [
        {"role": "system", "content": build_system_prompt(language, length)},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [None]:
link_system_prompt = """
You are provided with a list of links found on a Microsoft Learn training page.

Decide which of the links are lessons within the Quantum Computing Fundamentals training path
(https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/).

- Only include links that are actual lesson/module pages within this training path.
- Replace relative links (like /training/modules/...) with full https://learn.microsoft.com/... URLs.
- Ignore links to navigation, terms of service, privacy, blogs, or anything not part of this course.

You should respond in JSON in this format:

{
    "links": [
        {"type": "lesson", "url": "https://learn.microsoft.com/en-us/training/modules/intro-to-azure-quantum/1-introduction"},
        {"type": "lesson", "url": "https://learn.microsoft.com/en-us/training/modules/intro-to-azure-quantum/2-what-is-quantum-compute"}
    ]
}
"""


In [None]:

def get_links_user_prompt(website):
    user_prompt = (
        f"Here are the raw links scraped from {website.url}.\n"
        "Only include links that are lesson/module pages for the Quantum Computing Fundamentals path.\n"
        "A valid module URL will contain '/training/modules/'.\n"
        "Return a JSON object with an array 'links' where each item has type:'lesson' and url: full https URL.\n"
        "Do not include Terms of Service, Privacy, navigation, blog, external marketing, error pages, 404 pages, or mailto links.\n\n"
        "Links (some may be relative):\n"
        + "\n".join(website.links)
    )
    return user_prompt


In [None]:
import json

def get_links(url):
    website = Website(url)   # Website.links should be the raw href list
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    # parse model output (response.choices[0].message.content is JSON text)
    result = response.choices[0].message.content
    return json.loads(result)


In [None]:
import json

def get_links_ollama(url):
    website = Website(url)   # Website.links should be the raw href list
    response = ollama_via_openai.chat.completions.create(
        model= MODEL_LLAMA,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    # parse model output (response.choices[0].message.content is JSON text)
    result = response.choices[0].message.content
    return json.loads(result)


In [None]:
#call the OpenAI API. 

def summarize(url, language, length):
    website = Website(url)
    response = openai.chat.completions.create(
        model= "gpt-4o-mini",
        messages=messages_for(website, language, length)
    )
    return response.choices[0].message.content
    

In [None]:
#call ollama 

def summarize_ollama(url, language, length):
    website = Website(url)
    response = ollama_via_openai.chat.completions.create(
        model= MODEL_LLAMA,
        messages=messages_for(website, language, length)
    )
    return response.choices[0].message.content
    

In [None]:
#Summarize all the lessons in microsoft quantum computer training
from urllib.parse import urljoin

def summarize_training(path_url, language, length):
    # data = get_links(path_url) #returns json with links
    all_lessons = get_lesson_choices(path_url) #returns json with links
    # links = data.get("links", []) # extract list of lesson dicts
    print(f"Found {len(all_lessons)} lessons")
    #
    print(f"Debug: First 3 items in all_lessons:")
    for i, item in enumerate(all_lessons[:3]):
        print(f"  [{i}] type={type(item)}, len={len(item) if hasattr(item, '__len__') else 'N/A'}, value={item}")
    #
    all_summaries = []

    # for link in links:
    #     url = link["url"]
    #     title = link.get("title", url) #fallback if no title 

    # for lesson_title, lesson_url in all_lessons[1:]:
    for lesson_num, lesson_title, lesson_url in all_lessons[1:]:    
        if lesson_url.startswith("//"):
            lesson_url = "https:" + lesson_url
        elif lesson_url.startswith("/"):
            lesson_url = urljoin("https://learn.microsoft.com", lesson_url)
        print(f"Summarizing {lesson_title} ({lesson_url})...")
        summary = summarize(lesson_url, language, length)
        all_summaries.append(f"### {lesson_title}\n{summary}\n")

    combined_prompt = "Here are summaries of each lesson:\n\n" + "\n".join(all_summaries)
    response = openai.chat.completions.create(
        model= "gpt-4o-mini",
        messages=[
            {"role": "system", "content": build_system_prompt(language, length)},
            {"role": "user", "content": "Please summarize the entire training path based on these lesson summaries:\n\n" + combined_prompt}
        ]
    )

    return "\n".join(all_summaries) + "\n\n## General Course Summary\n" + response.choices[0].message.content
    

In [None]:
def get_lesson_choices(path_url):
    data = get_links(path_url)
    modules = data.get("links", [])
    all_lessons = []
    # filtered = []

    for module in modules:
        module_url = module.get("url")

        #fetch module page to get title
        module_site = Website(module_url)
        module_title = module_site.title or "Untitled Module"

        # go inside module page
        module_data = get_links(module_url)
        sublinks = module_data.get("links", [])


        for i, sub in enumerate(sublinks):
            lesson_title = sub.get("title", f"Lesson {i+1}")
            full_title = f"{module_title} - {lesson_title}"
            lesson_url = sub.get("url")
            
            # all_lessons.append((full_title, lesson_url))
            # Store as ("1", "Module - Lesson Title", url)
            all_lessons.append((str(len(all_lessons)+1), full_title, lesson_url))

    # return [("All Lessons", path_url)] + all_lessons
    # index 0 is always "All Lessons"
    return [("0", "All Lessons", path_url)] + all_lessons

In [None]:
def get_module_choices(path_url):
    data = get_links(path_url)
    modules = data.get("links", [])
    all_modules = []

    for i, module in enumerate(modules, start=1):
        module_url = module.get("url")
        module_site = Website(module_url)
        module_title = module_site.title or f"Module {i}"
        all_modules.append((str(i), module_title, module_url))

    return all_modules

In [None]:
get_module_choices("https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/")

In [None]:
def get_lessons_in_module(module_url, module_title=None):
    module_data = get_links(module_url)
    sublinks = module_data.get("links", [])
    all_lessons = []

    for i, sub in enumerate(sublinks, start=1):
        lesson_title = sub.get("title", f"Lesson {i}")
        full_title = f"{module_title} - {lesson_title}" if module_title else lesson_title
        lesson_url = sub.get("url")
        # all_lessons.append((str(i), full_title, lesson_url))
        all_lessons.append({
            "option": str(i),
            "title": full_title,
            "url": lesson_url
        })

    return all_lessons

In [None]:
get_lessons_in_module("https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/",1)

In [None]:
#Summarize all the lessons in microsoft quantum computer training
from urllib.parse import urljoin

def summarize_training_ollama(path_url, language, length):
    all_lessons = get_lesson_choices(path_url)

    print(f"Found {len(all_lessons)-1} lessons")
    all_summaries = []

    for lesson_title, lesson_url in all_lessons[1:]:
         if lesson_url.startswith("//"):
             lesson_url = "https:" + lesson_url
         elif lesson_url.startswith("/"):
             lesson_url = urljoin("https://learn.microsoft.com", lesson_url)
             
         print(f"Summarizing {lesson_title} ({lesson_url})...")
         summary = summarize_ollama(lesson_url, language, length)
         all_summaries.append(f"### {lesson_title}\n{summary}\n")

    combined_prompt = "Here are summaries of each lesson:\n\n" + "\n".join(all_summaries)
    response = ollama_via_openai.chat.completions.create(
        model= MODEL_LLAMA,
        messages=[
            {"role": "system", "content": build_system_prompt(language, length)},
            {"role": "user", "content": "Please summarize the entire training path based on these lesson summaries:\n\n" + combined_prompt}
        ]
    )

    return "\n".join(all_summaries) + "\n\n## General Course Summary\n" + response.choices[0].message.content
    

In [None]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url, language, length):
    summary = summarize(url, language, length)
    display(Markdown(summary))

In [None]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary_all(url, language, length):
    summary = summarize_training(url, language, length)
    display(Markdown(summary))

In [None]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary_ollama(url, language, length):
    summary = summarize_ollama(url, language, length)
    display(Markdown(summary))

In [None]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary_ollama_all(url, language, length):
    summary = summarize_training_ollama(url, language, length)
    display(Markdown(summary))

In [None]:
# def get_lesson_dropdown():
#     try:
#         return ["All Lessons"] + get_lesson_choices("https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/")
#     except:
#         return ["All Lessons"]

In [None]:
def get_lesson_dropdown():
    try:
        modules = get_module_choices("https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/")
        # return ["All Lessons"] + get_lesson_choices("https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/")
        return [("0", "All Lessons", None)] + modules
    except:
        # return ["All Lessons"]
        return [("0", "All Lessons", None)]

In [None]:
lesson_dropdown_function = {
    "name": "get_lesson_dropdown",
    "description": "Get the list of lessons available in the course",
    "parameters": {
        "type": "object",
        "properties": {},
        "additionalProperties": False    
    }
}

In [None]:
def summarize_choose_model(model, language, length, lesson):
    path_url = "https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/"
    model = "GPT"
    
    # Handle "All Lessons" explicitly
    if lesson == "0" or lesson == "All Lessons":
        if model == "GPT":
            result = summarize_training(path_url, language, length)
        elif model == "Ollama":
            result = summarize_training_ollama(path_url, language, length)
        return result
    
    # If lesson is a URL (from get_lessons_in_module), use it directly
    if lesson.startswith("http"):
        path_url = lesson
    else:
        # Fallback: treat as error since we expect URLs now
        return f"Error: Expected a lesson URL but got: {lesson}"
    
    if model == "GPT":
        result = summarize(path_url, language, length)
    elif model == "Ollama":
        result = summarize_ollama(path_url, language, length)
    else:
        raise ValueError("Please choose a different option")
    
    return result

In [None]:
get_lessons_function = {
    "name": "get_lessons_in_module",
    "description": "Get all lessons within a specific module",
    "parameters": {
        "type": "object",
        "properties": {
            "module_url": {
                "type": "string",
                "description": "The URL of the module to get lessons from"
            },
            "module_title": {
                "type": "string",
                "description": "The title of the module (optional, for display purposes)"
            }
        },
        "required": ["module_url"],
        "additionalProperties": False
    }
}



In [None]:
get_modules_function = {
    "name": "get_module_choices",
    "description": "Get the list of available modules in the course",
    "parameters": {
        "type": "object",
        "properties": {},
        "additionalProperties": False
    }
}

In [None]:
summarize_translate_function = {
    "name": "summarize_choose_model",
    "description": "Summarize and/or translate Microsoft Quantum Computing Course",
    "parameters": {
        "type": "object",
        "properties": {
            "lesson": {
                "type": "string",
                "description": "Either 'All Lessons' or the lesson NUMBER (e.g., '1', '2', '3')",               
            },
            "language": {
                "type": "string",
                "description": "The language the user wants to use",
            },
            "length": {
                "type": "string",
                "enum": ["short", "medium", "long"],
                "description": "Desired summary length",
            }
        },
        "required": ["lesson", "language", "length"],
        "additionalProperties": False
    }
}

In [None]:
tools = [
    {"type": "function", "function": summarize_translate_function},
    {"type": "function", "function": lesson_dropdown_function},
    {"type": "function", "function": get_lessons_function},
    {"type": "function", "function": get_modules_function}
]

In [None]:
system_message = "You are a helpful, charismatic, PhD professor in quantum computing."
system_message += " You are amazing at explaining complex subjects in a concise, easy way that even elementary school students can understand."
system_message += " The user has already been asked if they want the full course or a single lesson."
system_message += " If they say 'entire course' or 'all lessons': ask for language and length, then call summarize_choose_model with lesson='All Lessons'."
system_message += " If they say 'single lesson' or 'specific lesson': Step 1) Call get_module_choices. Step 2) After they pick a module, call get_lessons_in_module. Step 3) After they pick a lesson, ask for language and length. Step 4) Call summarize_choose_model with the lesson URL."
system_message += " Never re-ask if they want full course or single lesson - they already answered that."

In [None]:
MODEL = "gpt-4o-mini"

In [None]:
def handle_tool_call(message):
    tool_call = message.tool_calls[0]
    function_name = tool_call.function.name
    arguments = json.loads(tool_call.function.arguments)
    
    # Handle get_module_choices
    if function_name == "get_module_choices":
        path_url = "https://learn.microsoft.com/en-us/training/paths/quantum-computing-fundamentals/"
        modules = get_module_choices(path_url)
        content = json.dumps({"modules": modules})
    
    # Handle get_lessons_in_module
    elif function_name == "get_lessons_in_module":
        module_url = arguments.get("module_url")
        module_title = arguments.get("module_title")
        lessons = get_lessons_in_module(module_url, module_title)
        content = json.dumps({"lessons": lessons})
    
    # Handle summarize_choose_model
    elif function_name == "summarize_choose_model":
        lesson = arguments.get("lesson")
        language = arguments.get("language")
        length = arguments.get("length")
        
        summary = summarize_choose_model(
            model="GPT",
            lesson=lesson,
            language=language,
            length=length
        )
        
        content = json.dumps({
            "lesson": lesson,
            "language": language,
            "length": length,
            "summary": summary
        })
    
    response = {
        "role": "tool",
        "content": content,
        "tool_call_id": tool_call.id
    }
    
    return response

In [None]:
# This version of the chat has no audio incorporated yet, audio will be added in the block below

def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)

    if response.choices[0].finish_reason=="tool_calls":
        message = response.choices[0].message
        response = handle_tool_call(message)
        messages.append(message)
        messages.append(response)
        response = openai.chat.completions.create(model=MODEL, messages=messages)
    return response.choices[0].message.content


    

In [None]:
 gr.ChatInterface(fn=chat, type="messages").launch()

In [None]:
def talker(message):
    import re
    import glob
    import os
    
    # Remove URLs from spoken message
    spoken_message = re.sub(r'https?://[^\s]+', '', message)
    spoken_message = re.sub(r'\[.*?\]\(.*?\)', '', spoken_message)
    spoken_message = spoken_message.strip()
    
    # Delete old audio files BEFORE creating new one
    audio_files = glob.glob("output_audio*.mp3")
    for file in audio_files:
        try:
            if os.path.exists(file):
                os.remove(file)
        except Exception as e:
            pass  # Ignore if file is in use
    
    response = openai.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=spoken_message
    )
    audio_stream = BytesIO(response.content)
    output_filename = f"output_audio_{uuid.uuid4().hex}.mp3"
    with open(output_filename, "wb") as f:
        f.write(audio_stream.read())
        display(Audio(output_filename, autoplay=True))

In [None]:
# this version of the chat responds and gives instructions via voice in the
# next block i will add a transcription and option to get the user's voice input 
def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)

    if response.choices[0].finish_reason=="tool_calls":
        message = response.choices[0].message
        response = handle_tool_call(message)
        messages.append(message)
        messages.append(response)
        response = openai.chat.completions.create(model=MODEL, messages=messages)
    # return response.choices[0].message.content
    reply = response.choices[0].message.content
    history += [{"role":"assistant", "content":reply}]

    talker(reply)

    return history

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

In [None]:
# add transcription to take recorded audio and convert to text
def transcribe(audio_file_path):
    with open(audio_file_path, "rb") as f:
        transcript = openai.audio.transcriptions.create(
            model="whisper-1",
            file=f
        )
    return transcript.text   

In [None]:
def chat_audio(audio, history):
    user_message = transcribe(audio)
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": user_message}]
    
    print("=" * 50)
    print("DEBUG: Current history length:", len(history))
    print("DEBUG: Messages being sent to API:")
    for i, msg in enumerate(messages):
        role = msg.get("role", "unknown")
        content = msg.get("content", "")
        content_preview = content[:100] if isinstance(content, str) else str(content)[:100]
        print(f"  [{i}] role={role}, content={content_preview}...")
    print("=" * 50)
    
    response = openai.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    
    if response.choices[0].finish_reason == "tool_calls":
        message = response.choices[0].message
        tool_response = handle_tool_call(message)
        messages.append(message)
        messages.append(tool_response)
        response = openai.chat.completions.create(model=MODEL, messages=messages)
    
    reply = response.choices[0].message.content
    history += [{"role": "user", "content": user_message}]
    history += [{"role": "assistant", "content": reply}]
    
    if len(reply) > 4000:
        spoken_reply = reply[:4000] + "... The full response is shown in the chat."
    else:
        spoken_reply = reply
    
    talker(spoken_reply)

    return history


In [None]:
import gradio as gr
import os
import glob

def clear_all():
    # Delete all audio files in the current directory
    audio_files = glob.glob("output_audio*.mp3")  # Your TTS output files
    for file in audio_files:
        try:
            if os.path.exists(file):
                os.remove(file)
                print(f"Deleted: {file}")
        except Exception as e:
            print(f"Could not delete {file}: {e}")
    
    return None  # Clear chatbot and audio input


In [None]:
import gradio as gr

def initial_greeting():
    greeting = "Hello! I'm your quantum computing assistant. Would you like a summary \
    of the entire QuantumComputing course, or would you prefer to learn more \
    about a specific lesson?"
    talker(greeting)
    return [{"role": "assistant", "content": greeting}]
with gr.Blocks() as ui:
    gr.Markdown("""
    ## Quantum Computing Voice Assistant
    
    **For screen reader users:**
    - Use Tab key to navigate between buttons
    - Press Enter or Space to activate buttons
    - Workflow: Click "Record" → Speak your question → Click "Stop recording" → Wait for response → Click "Clear" → Repeat
    """)
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages", value=initial_greeting())
    with gr.Row():
        audio_input = gr.Audio(
            sources=["microphone"],
            type="filepath",
            label="Speak to the assistant"
        )
  
        clear = gr.Button("Clear")

    
    audio_input.stop_recording(
        chat_audio,
        inputs=[audio_input, chatbot],
        outputs=[chatbot]
    )    

    clear.click(
        clear_all,
        None, 
        [audio_input],  # Specify both as outputs
        queue=False
    )
ui.launch(inbrowser=True)  