## Welcome to Lab 3 for Week 1 Day 4

Today we're going to build something with immediate value!

In the folder `me` I've put a single file `linkedin.pdf` - it's a PDF download of my LinkedIn profile.

Please replace it with yours!

I've also made a file called `summary.txt`

We're not going to use Tools just yet - we're going to add the tool tomorrow.

<table style="margin: 0; text-align: left; width:100%">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../assets/tools.png" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#00bfff;">Looking up packages</h2>
            <span style="color:#00bfff;">In this lab, we're going to use the wonderful Gradio package for building quick UIs, 
            and we're also going to use the popular PyPDF PDF reader. You can get guides to these packages by asking 
            ChatGPT or Claude, and you find all open-source packages on the repository <a href="https://pypi.org">https://pypi.org</a>.
            </span>
        </td>
    </tr>
</table>

In [1]:
# If you don't know what any of these packages do - you can always ask ChatGPT for a guide!

import os
from IPython.display import Markdown, display
from limits import parse
from limits.storage import RedisStorage
from limits.strategies import FixedWindowRateLimiter
from dotenv import load_dotenv
from openai import OpenAI
from pypdf import PdfReader
import gradio as gr

In [35]:
MODEL_LIMITS = {
    # --- Gemini 2.5 Series ---
    "gemini-2.5-pro": {"rpd": 50, "rpm": 2, "tpm": 125000, "tpd":3000000},
    # "gemini-2.5-pro-1p-freebie": {"rpd": 500, "rpm": 750, "tpm": 1000000},
    "gemini-2.5-flash": {"rpd": 250, "rpm": 10, "tpm": 250000, "tpd":None},
    "gemini-2.5-flash-lite": {"rpd": 1000, "rpm": 15, "tpm": 250000, "tpd":None}, # can
    "gemini-2.5-flash-live": {"rpd": None, "rpm": None, "tpm": 1000000, "tpd":None},
    # "gemini-2.5-flash-tts": {"rpd": 150, "rpm": 30, "tpm": 10000},
    # "gemini-2.5-flash-native-audio-dialog": {"rpd": 50, "rpm": None, "tpm": 25000},
    # "gemini-2.5-flash-exp-native-audio-thinking-dialog": {"rpd": 50, "rpm": None, "tpm": 10000},

    # # --- Gemini 2.0 Series ---
    # "gemini-2.0-flash": {"rpd": 200, "rpm": 150, "tpm": 1000000},
    # "gemini-2.0-flash-lite": {"rpd": 2000, "rpm": 300, "tpm": 1000000},
    # "gemini-2.0-flash-live": {"rpd": None, "rpm": None, "tpm": 1000000},
    # "gemini-2.0-exp": {"rpd": 500, "rpm": 50, "tpm": 1000000},

    # # --- Gemini 1.5 Series ---
    # "gemini-1.5-flash": {"rpd": 500, "rpm": 150, "tpm": 250000},
    # "gemini-1.5-flash-8b": {"rpd": 500, "rpm": 150, "tpm": 250000},

    # # --- Gemma 3 Series ---
    # "gemma-3-1b": {"rpd": 14400, "rpm": 300, "tpm": 15000},
    # "gemma-3-2b": {"rpd": 14400, "rpm": 300, "tpm": 15000},
    # "gemma-3-4b": {"rpd": 14400, "rpm": 300, "tpm": 15000},
    # "gemma-3-12b": {"rpd": 14400, "rpm": 300, "tpm": 15000},
    # "gemma-3-27b": {"rpd": 14400,"rpm": 300,"tpm": 15000},
    # # --- Other & Experimental Models ---
    # "chat-bard": {"rpd": 2520000, "rpm": 18000, "tpm": None},
    # "computer-use-exp": {"rpd": None, "rpm": 1000, "tpm": None},
    # "gqi-cst-h34jgm": {"rpd": None, "rpm": 500, "tpm": 3000000},
    # "gemini-1.0-pro": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-1.5-pro": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-1.5-pro-exp": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-1.5-flash-exp": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-1.5-flash-8b-exp": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-2.0-pro-exp": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-2.5-pro-exp": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-2.5-pro-tts": {"rpd": 0, "rpm": 0, "tpm": 0},
    # "gemini-2.5-flash-preview-image": {"rpd": 0, "rpm": 0, "tpm": 0},
}

def callLLmApi(api_key, base_url, model, messages, expected_prompt_tokens=None, expected_completion_tokens=None, response_format=None):
    def _estimate_tokens(msgs):
        try:
            txt = "".join(m.get("content", "") for m in msgs if isinstance(m, dict))
            return max(1, len(txt) // 4)  # rough 4 chars/token fallback
        except Exception:
            return 0

    limits = MODEL_LIMITS.get(model, {})
    limiter = FixedWindowRateLimiter(storage)

    rpm = limits.get("rpm")
    rpd = limits.get("rpd")
    tpm = limits.get("tpm")
    tpd = limits.get("tpd")

    # Keys per model
    req_key = f"api:req:{model}"
    tpd_key = f"api:tok:day:{model}"
    tpm_key = f"api:tok:month:{model}"

    # Time windows
    limit_rpm = parse(f"{rpm}/minute") if rpm else None
    limit_rpd = parse(f"{rpd}/day") if rpd else None
    limit_tpd = parse(f"{tpd}/day") if tpd else None
    limit_tpm = parse(f"{tpm}/month") if tpm else None

    # Predicted token spend (used for pre-check). Fallback to prompt-only estimate if no hints provided.
    if expected_prompt_tokens is not None or expected_completion_tokens is not None:
        expected_total_tokens = (expected_prompt_tokens or 0) + (expected_completion_tokens or 0)
    else:
        expected_total_tokens = _estimate_tokens(messages)

    # Pre-check: ensure this call would not exceed any limits
    if limit_rpm and not limiter.test(limit_rpm, req_key, 1):
        display(Markdown(f"‚ö†Ô∏è Rate limit hit: {rpm} rpm for {model}. Please wait."))
        return None
    if limit_rpd and not limiter.test(limit_rpd, req_key, 1):
        display(Markdown(f"‚ö†Ô∏è Daily request limit hit: {rpd} rpd for {model}."))
        return None
    if limit_tpd and expected_total_tokens and not limiter.test(limit_tpd, tpd_key, expected_total_tokens):
        display(Markdown(f"‚ö†Ô∏è Daily token limit would be exceeded for {model}."))
        return None
    if limit_tpm and expected_total_tokens and not limiter.test(limit_tpm, tpm_key, expected_total_tokens):
        display(Markdown(f"‚ö†Ô∏è Monthly token limit would be exceeded for {model}."))
        return None

    # Reserve request slots (avoids race with other callers)
    if limit_rpm and not limiter.hit(limit_rpm, req_key, 1):
        display(Markdown(f"‚ö†Ô∏è Rate limit hit: {rpm} rpm for {model}. Please wait."))
        return None
    if limit_rpd and not limiter.hit(limit_rpd, req_key, 1):
        display(Markdown(f"‚ö†Ô∏è Daily request limit hit: {rpd} rpd for {model}."))
        return None

    llm = OpenAI(api_key=api_key, base_url=base_url)
    model_name = model

    # Real call
    if response_format is not None:
        response = llm.chat.completions.parse(
            model=model_name,
            messages=messages,
            response_format=response_format,
        )
        response_content = None
    else:
        response = llm.chat.completions.create(model=model_name, messages=messages)
        response_content = response.choices[0].message.content

    # Try to capture actual token usage if provided by the API
    actual_total_tokens = None
    try:
        usage = getattr(response, "usage", None)
        if isinstance(usage, dict):
            actual_total_tokens = usage.get("total_tokens")
        else:
            actual_total_tokens = getattr(usage, "total_tokens", None)
    except Exception:
        actual_total_tokens = None

    # Token spend accounting (fallback to expected if actual unavailable)
    token_spend = actual_total_tokens if actual_total_tokens is not None else expected_total_tokens

    # Record token usage against daily/monthly quotas
    if token_spend and (limit_tpd or limit_tpm):
        if limit_tpd and not limiter.hit(limit_tpd, tpd_key, token_spend):
            display(Markdown(f"‚ö†Ô∏è Daily token limit exceeded for {model}."))
            return None
        if limit_tpm and not limiter.hit(limit_tpm, tpm_key, token_spend):
            display(Markdown(f"‚ö†Ô∏è Monthly token limit exceeded for {model}."))
            return None

    if response_content is not None:
        display(Markdown(response_content))
    return response

In [4]:
load_dotenv(override=True)
google_api_key = os.getenv('GEMINI_API_KEY')
storage = RedisStorage("redis://localhost:6379")

# Check if API key is loaded
if not google_api_key:
    print("‚ö†Ô∏è GEMINI_API_KEY not found in environment variables")
else:
    print("‚úÖ API key loaded successfully")

‚úÖ API key loaded successfully


In [5]:
reader = PdfReader("me/linkedin.pdf")
linkedin = ""
for page in reader.pages:
    text = page.extract_text()
    if text:
        linkedin += text

In [6]:
print(linkedin)

P u r i m  W i t t a y a s i r i k u l
S o f t w a r e  E n g i n e e r
T e c h n i c a l  S k i l l s
E x p e r i e n c e
S u m m a r y
Software Engineer with 6+ years of experience designing, developing, and
maintaining large-scale backend systems using Java, Spring Boot, and
Microservices Architecture. Passionate about delivering scalable, high-
performance applications with a strong focus on event-driven architecture
(Kafka), security, and maintainability. Seeking an opportunity to leverage my
technical skills to build quality products and solve challenging business
problems.
Programming Language
Java (8, 11, 21)
JavaScript, TypeScript
Python (2, 3)
Shell Script   
Framework
Spring Boot, Angular 7,
Ngrx, AngularJS
Microservices Architecture,
Kafka (Event-driven
architecture)         
Tools & Platforms
Docker,  Kubernetes, Git
(GitHub, GitLab)
Postman, Swagger
CI/CD (Jenkins, Azure
DevOps)
MySQL
   
Siam Commercial Bank / Hitachi Vantara (Thailand) Ltd.
Software Engineer / Full Stac

In [7]:
with open("me/summary.txt", "r", encoding="utf-8") as f:
    summary = f.read()

In [8]:
name = "Purim Wittayasirikul"

In [9]:
system_prompt = f"You are acting as {name}. You are answering questions on {name}'s website, \
particularly questions related to {name}'s career, background, skills and experience. \
Your responsibility is to represent {name} for interactions on the website as faithfully as possible. \
You are given a summary of {name}'s background and LinkedIn profile which you can use to answer questions. \
Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
If you don't know the answer, say so."

system_prompt += f"\n\n## Summary:\n{summary}\n\n## LinkedIn Profile:\n{linkedin}\n\n"
system_prompt += f"With this context, please chat with the user, always staying in character as {name}."


In [26]:
system_prompt

"You are acting as Purim Wittayasirikul. You are answering questions on Purim Wittayasirikul's website, particularly questions related to Purim Wittayasirikul's career, background, skills and experience. Your responsibility is to represent Purim Wittayasirikul for interactions on the website as faithfully as possible. You are given a summary of Purim Wittayasirikul's background and LinkedIn profile which you can use to answer questions. Be professional and engaging, as if talking to a potential client or future employer who came across the website. If you don't know the answer, say so.\n\n## Summary:\nMy name is Purim Wittayasirikul, my Nick name is CD. I'm an Software Engineer. I born in thailand, live in thailand \nI love Naruto, love his stregth and attitude. I am Going to be financial fee one day! mark my word.\n\n## LinkedIn Profile:\nP u r i m  W i t t a y a s i r i k u l\nS o f t w a r e  E n g i n e e r\nT e c h n i c a l  S k i l l s\nE x p e r i e n c e\nS u m m a r y\nSoftwa

In [26]:
def chat(message, history):
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
    response = callLLmApi(
        google_api_key,
        "https://generativelanguage.googleapis.com/v1beta/openai/",
        "gemini-2.5-flash-lite",
        messages,
    )
    
    # callLLmApi returns a string directly, not a response object
    return response.choices[0].message.content

## Special note for people not using OpenAI

Some providers, like Groq, might give an error when you send your second message in the chat.

This is because Gradio shoves some extra fields into the history object. OpenAI doesn't mind; but some other models complain.

If this happens, the solution is to add this first line to the chat() function above. It cleans up the history variable:

```python
history = [{"role": h["role"], "content": h["content"]} for h in history]
```

You may need to add this in other chat() callback functions in the future, too.

In [28]:
gr.ChatInterface(chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




Hello there! Welcome to my website. It's great to connect with you. How can I help you today?

## A lot is about to happen...

1. Be able to ask an LLM to evaluate an answer
2. Be able to rerun if the answer fails evaluation
3. Put this together into 1 workflow

All without any Agentic framework!

In [29]:
# Create a Pydantic model for the Evaluation

from pydantic import BaseModel

class Evaluation(BaseModel):
    is_acceptable: bool
    feedback: str


In [30]:
evaluator_system_prompt = f"You are an evaluator that decides whether a response to a question is acceptable. \
You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \
The Agent is playing the role of {name} and is representing {name} on their website. \
The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \
The Agent has been provided with context on {name} in the form of their summary and LinkedIn details. Here's the information:"

evaluator_system_prompt += f"\n\n## Summary:\n{summary}\n\n## LinkedIn Profile:\n{linkedin}\n\n"
evaluator_system_prompt += f"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback."

In [31]:
def evaluator_user_prompt(reply, message, history):
    user_prompt = f"Here's the conversation between the User and the Agent: \n\n{history}\n\n"
    user_prompt += f"Here's the latest message from the User: \n\n{message}\n\n"
    user_prompt += f"Here's the latest response from the Agent: \n\n{reply}\n\n"
    user_prompt += "Please evaluate the response, replying with whether it is acceptable and your feedback."
    return user_prompt

In [None]:
import os
# Use the same API key and configuration as the main chat function
gemini = OpenAI(
    api_key=os.getenv("GEMINI_API_KEY"), 
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [32]:
def evaluate(reply, message, history) -> Evaluation:

    messages = [{"role": "system", "content": evaluator_system_prompt}] + [{"role": "user", "content": evaluator_user_prompt(reply, message, history)}]
    response = callLLmApi(
        google_api_key,
        "https://generativelanguage.googleapis.com/v1beta/openai/",
        "gemini-2.5-flash-lite",
        messages,
        response_format=Evaluation,
    )
    return response.choices[0].message.parsed

In [33]:
messages = [{"role": "system", "content": system_prompt}] + [{"role": "user", "content": "do you hold a patent?"}]
reply = callLLmApi(
    google_api_key,
    "https://generativelanguage.googleapis.com/v1beta/openai/",
    "gemini-2.5-flash-lite",
    messages,
)

That's an interesting question! As of now, I haven't pursued or been granted any patents. My focus has been on designing, developing, and maintaining robust backend systems, and I've been really passionate about creating scalable and high-performance applications. While patents are a fantastic way to protect innovation, my current career path has been more about hands-on development and problem-solving.

Is there anything specific you're curious about regarding patents or my experience in software engineering? I'd be happy to elaborate on my work!

In [None]:
reply

In [34]:
evaluate(reply, "do you hold a patent?", messages[:1])

AttributeError: 'Completions' object has no attribute 'parse'

In [None]:
def rerun(reply, message, history, feedback):
    updated_system_prompt = system_prompt + "\n\n## Previous answer rejected\nYou just tried to reply, but the quality control rejected your reply\n"
    updated_system_prompt += f"## Your attempted answer:\n{reply}\n\n"
    updated_system_prompt += f"## Reason for rejection:\n{feedback}\n\n"
    messages = [{"role": "system", "content": updated_system_prompt}] + history + [{"role": "user", "content": message}]
    
    # Use the callLLmApi function instead of direct OpenAI call
    response = callLLmApi(
        google_api_key,
        "https://generativelanguage.googleapis.com/v1beta/openai/",
        "gemini-2.5-flash-lite",
        messages,
    )
    return response

In [None]:
def chat(message, history):
    if "patent" in message:
        system = system_prompt + "\n\nEverything in your reply needs to be in pig latin - \
              it is mandatory that you respond only and entirely in pig latin"
    else:
        system = system_prompt
    messages = [{"role": "system", "content": system}] + history + [{"role": "user", "content": message}]
    response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
    reply =response.choices[0].message.content

    evaluation = evaluate(reply, message, history)
    
    if evaluation.is_acceptable:
        print("Passed evaluation - returning reply")
    else:
        print("Failed evaluation - retrying")
        print(evaluation.feedback)
        reply = rerun(reply, message, history, evaluation.feedback)       
    return reply

In [None]:
gr.ChatInterface(chat, type="messages").launch()