In [1]:
!pip install -r requirements.txt
!pip install pdf2image pytesseract
!apt-get install -y poppler-utils

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.8).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [2]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

In [4]:
import os, json
from llama_parse import LlamaParse
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
from llama_index.core.workflow import (
    StartEvent,
    StopEvent,
    Workflow,
    step,
    Event,
    Context,
    InputRequiredEvent,
    HumanResponseEvent
)
from llama_index.utils.workflow import draw_all_possible_flows
from llama_index.readers.whisper import WhisperReader
import gradio as gr
import asyncio
from queue import Queue

import nest_asyncio
nest_asyncio.apply()

from helper import get_openai_api_key, get_llama_cloud_api_key

llama_cloud_api_key = get_llama_cloud_api_key()
openai_api_key = get_openai_api_key()

from pdf2image import convert_from_path
import pytesseract


In [5]:
import shutil

class ParseFormEvent(Event):
    application_form: str

class QueryEvent(Event):
    query: str

class ResponseEvent(Event):
    response: str

class FeedbackEvent(Event):
    feedback: str

class GenerateQuestionsEvent(Event):
    pass


class RAGWorkflow(Workflow):
    storage_dir = "./storage"
    llm: OpenAI
    query_engine: VectorStoreIndex

    @step
    async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:

        if not ev.resume_file:
            raise ValueError("No resume file provided")

        if not ev.application_form:
            raise ValueError("No application form provided")

        # define the LLM to work with
        self.llm = OpenAI(model="gpt-4o-mini")


        # parse and load the resume document
        documents = LlamaParse(
            api_key=llama_cloud_api_key,
            base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
            result_type="markdown",
            content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
        ).load_data(ev.resume_file)
        # embed and index the documents
        index = VectorStoreIndex.from_documents(
            documents,
            embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
        )
        index.storage_context.persist(persist_dir=self.storage_dir)

        # create a query engine
        self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)

        # let's pass the application form to a new step to parse it
        return ParseFormEvent(application_form=ev.application_form)

    # we've separated the form parsing from the question generation
    @step
    async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:

        print("Parsing form from path:", ev.application_form)
        pages = convert_from_path(ev.application_form)
        print(f"Found {len(pages)} pages.")

        form_text = ""
        for page in pages:
            form_text += pytesseract.image_to_string(page)


        prompt = f"""
        This is a parsed job application form as plain text.
        Extract a list of all fields that need to be filled in and convert it into:
        {{ "fields": ["Field 1", "Field 2", ...] }}

        <form>{form_text}</form>

        Return JSON ONLY. No markdown or explanation.
        """

        raw_json = self.llm.complete(prompt)
        try:
            fields = json.loads(raw_json.text.strip())["fields"]
        except Exception as e:
            raise ValueError(f"❌ Failed to parse JSON. Error: {e}\nLLM Output:\n{raw_json.text}")

        await ctx.set("fields_to_fill", fields)

        return GenerateQuestionsEvent()

    # this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
    @step
    async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:

        # get the list of fields to fill in
        fields = await ctx.get("fields_to_fill")

        # generate one query for each of the fields, and fire them off
        for field in fields:
            question = f"How would you answer this question about the candidate? <field>{field}</field>"

            if hasattr(ev,"feedback"):
                question += f"""
                    \nWe previously got feedback about how we answered the questions.
                    It might not be relevant to this particular field, but here it is:
                    <feedback>{ev.feedback}</feedback>
                """

            ctx.send_event(QueryEvent(
                field=field,
                query=question
            ))

        # store the number of fields so we know how many to wait for later
        await ctx.set("total_fields", len(fields))
        return

    @step
    async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
        print(f"Asking question: {ev.query}")

        response = self.query_engine.query(f"This is a question about the specific resume we have in our database: {ev.query}")

        print(f"Answer was: {str(response)}")

        return ResponseEvent(field=ev.field, response=response.response)

    # we now emit an InputRequiredEvent
    @step
    async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
        # get the total number of fields to wait for
        total_fields = await ctx.get("total_fields")

        responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
        if responses is None:
            return None # do nothing if there's nothing to do yet

        # we've got all the responses!
        responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)

        result = self.llm.complete(f"""
            You are given a list of fields in an application form and responses to
            questions about those fields from a resume. Combine the two into a list of
            fields and succinct, factual answers to fill in those fields.

            <responses>
            {responseList}
            </responses>
        """)

        # save the result for later
        await ctx.set("filled_form", str(result))

        # Let's get a human in the loop
        return InputRequiredEvent(
            prefix="How does this look? Give me any feedback you have on any of the answers.",
            result=result
        )

    # Accept the feedback.
    @step
    async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:

        result = self.llm.complete(f"""
            You have received some human feedback on the form-filling task you've done.
            Does everything look good, or is there more work to be done?
            <feedback>
            {ev.response}
            </feedback>
            If everything is fine, respond with just the word 'OKAY'.
            If there's any other feedback, respond with just the word 'FEEDBACK'.
        """)

        verdict = result.text.strip()

        print(f"LLM says the verdict was {verdict}")
        if (verdict == "OKAY"):
            return StopEvent(result=await ctx.get("filled_form"))
        else:
            return FeedbackEvent(feedback=ev.response)



In [6]:
WORKFLOW_FILE = "lesson_6.html"
draw_all_possible_flows(RAGWorkflow, filename=WORKFLOW_FILE)

lesson_6.html


# Getting voice feedback

In [7]:
def transcribe_speech(filepath):
    if filepath is None:
             return "⚠️ No audio file uploaded. Skipping voice transcription."
    audio_file= open(filepath, "rb")
    reader = WhisperReader(
        model="whisper-1",
        api_key=openai_api_key,
    )
    documents = reader.load_data(filepath)
    return documents[0].text

In [8]:
def store_transcription(output):
    global transcription_value
    transcription_value = output
    return output

In [9]:
mic_transcribe = gr.Interface(
    fn=lambda x: store_transcription(transcribe_speech(x)),
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription"))

In [10]:
# New! Transcription handler.
class TranscriptionHandler:

    # we create a queue to hold transcription values
    def __init__(self):
        self.transcription_queue = Queue()
        self.interface = None

    # every time we record something we put it in the queue
    def store_transcription(self, output):
        self.transcription_queue.put(output)
        return output

    # This is the same interface and transcription logic as before
    # except it stores the result in a queue instead of a global
    def create_interface(self):
        mic_transcribe = gr.Interface(
            fn=lambda x: self.store_transcription(transcribe_speech(x)),
            inputs=gr.Audio(sources="microphone", type="filepath"),
            outputs=gr.Textbox(label="Transcription")
        )
        self.interface = gr.Blocks()
        with self.interface:
            gr.TabbedInterface(
                [mic_transcribe],
                ["Transcribe Microphone"]
            )
        return self.interface

    # we launch the transcription interface
    async def get_transcription(self):
        self.interface = self.create_interface()
        self.interface.launch(
            share=True,
            prevent_thread_lock=True
        )

        # we poll every 1.5 seconds waiting for something to end up in the queue
        while True:
            if not self.transcription_queue.empty():
                result = self.transcription_queue.get()
                if self.interface is not None:
                    self.interface.close()
                return result
            await asyncio.sleep(1.5)


In [11]:
w = RAGWorkflow(timeout=600, verbose=False)

handler = w.run(
    resume_file="SemyonTsyrenov_Resume_MachineLearning.pdf",
    application_form="job_form.pdf"
)

async for event in handler.stream_events():
  if isinstance(event, InputRequiredEvent):
      # Get transcription
      transcription_handler = TranscriptionHandler()
      response = await transcription_handler.get_transcription()

      handler.ctx.send_event(
          HumanResponseEvent(
              response=response
          )
      )

response = await handler
print("Agent complete! Here's your final result:")
print(str(response))

Started parsing the file under job_id 345f3cd5-3bd5-47bf-8359-8ac46f838c78
Parsing form from path: job_form.pdf
Found 2 pages.
Asking question: How would you answer this question about the candidate? <field>Legal First Name</field>
Answer was: Semyon
Asking question: How would you answer this question about the candidate? <field>Legal Last Name</field>
Answer was: The legal last name of the candidate is Tsyrenov.
Asking question: How would you answer this question about the candidate? <field>Email</field>
Answer was: tsyrenovsemyon@gmail.com
Asking question: How would you answer this question about the candidate? <field>Phones</field>
Answer was: The candidate's phone number is 470-442-5949.
Asking question: How would you answer this question about the candidate? <field>Addresses</field>
Answer was: The candidate is located in Kansas City, MO.
Asking question: How would you answer this question about the candidate? <field>City</field>
Answer was: Kansas City, MO
Asking question: How wo

Closing server running on port: 7860
LLM says the verdict was FEEDBACK
Asking question: How would you answer this question about the candidate? <field>Legal First Name</field>
                    
We previously got feedback about how we answered the questions.
                    It might not be relevant to this particular field, but here it is:
                    <feedback>For the title, please provide the title of the job experiences that I had, not my name. Also for the start date and end date, provide the dates of those experiences.
</feedback>
                
Answer was: The legal first name of the candidate is Semyon.
Asking question: How would you answer this question about the candidate? <field>Legal Last Name</field>
                    
We previously got feedback about how we answered the questions.
                    It might not be relevant to this particular field, but here it is:
                    <feedback>For the title, please provide the title of the job experienc

Closing server running on port: 7860
LLM says the verdict was OKAY
Agent complete! Here's your final result:
Here is the combined list of fields and succinct, factual answers based on the provided responses:

1. **Legal First Name:** Semyon
2. **Legal Last Name:** Tsyrenov
3. **Email:** tsyrenovsemyon@gmail.com
4. **Phones:** 470-442-5949
5. **Addresses:** Kansas City, MO
6. **City:** Kansas City, MO
7. **Postal Code:** Not provided
8. **Country/Region/Location:** Kansas City, MO
9. **Region (State, Province, County):** Kansas City, MO
10. **Education:** Bachelor of Science in Computer Science at Rockhurst University (August 2021 - May 2025), enrolled in Joint Engineering Program at University of Missouri-Kansas City (August 2022 - May 2025).
11. **School:** Rockhurst University and University of Missouri-Kansas City
12. **Degree:** Bachelor of Science in Computer Science
13. **Major / Program of Study:** Computer Science
14. **Did You Graduate?:** Currently pursuing, expected graduati