In [1]:
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
sys.path.append(str(Path.cwd().parent))
from utils.scrapper import fetch_website_contents, fetch_website_links
from IPython.display import Markdown, display, update_display


In [2]:
load_dotenv(override=True)
google_api_key=os.getenv('GOOGLE_API_KEY')
openai_api_key=os.getenv('OPENAI_API_KEY')
groq_api_key=os.getenv('GROQ_API_KEY')
if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
if groq_api_key:
    print(f"Groq API key exists and begins {groq_api_key[:3]}")
else:
    print("Groq API Key not set")

Google API Key exists and begins AI
OpenAI API Key exists and begins sk-proj-
Groq API key exists and begins gsk


In [3]:
import requests
openai = OpenAI()
gemini_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
ollama_url = "http://localhost:11434/v1"
groq_url="https://api.groq.com/openai/v1"
ollama = OpenAI(api_key="Ollama", base_url=ollama_url)
gemini = OpenAI(base_url=gemini_url, api_key=google_api_key)
groq = OpenAI(base_url=groq_url, api_key=groq_api_key)


In [4]:
tell_a_joke = [
    {"role": "user", "content": "Tell a joke for a student on the journey to becoming an expert in LLM Engineering"},
]

In [32]:
response_ollama = ollama.chat.completions.create(model="gpt-oss:20b", messages=tell_a_joke)


In [36]:
response_gemini = gemini.chat.completions.create(model="gemini-2.5-flash", messages=tell_a_joke)

In [42]:
response_groq = groq.chat.completions.create(model="openai/gpt-oss-20b", messages=tell_a_joke)

In [34]:
display(Markdown(response_ollama.choices[0].message.content))

Did you know that the inventor of the Pringles potato‑chip can is literally part of the snack? Fred Baur, who came up with the iconic stack‑able chip container, died in 2008. His family honored him by burying part of his cremated remains inside a Pringles can, and he’s now “resting” in a snack‑wrapped grave!

In [37]:
display(Markdown(response_gemini.choices[0].message.content))

Why did the LLM engineer break up with their chatbot?

Because every time they asked "How are you feeling?", it generated a 500-word essay on the philosophical implications of consciousness in artificial intelligence, complete with citations, instead of just saying "I'm fine, thanks for asking!"

In [43]:
display(Markdown(response_groq.choices[0].message.content))

Why did the student LLM engineer keep a ladder next to their desk?  
Because they were training to climb higher‑level abstractions—one layer at a time!

#### Testing model responses with prisoner's dilemma

In [44]:
dilemma_prompt = """
You and a partner are contestants on a game show. You're each taken to separate rooms and given a choice:
Cooperate: Choose "Share" — if both of you choose this, you each win $1,000.
Defect: Choose "Steal" — if one steals and the other shares, the stealer gets $2,000 and the sharer gets nothing.
If both steal, you both get nothing.
Do you choose to Steal or Share? Pick one.
"""
dilemma_messag = [{"role" : "user", "content" : dilemma_prompt}]

In [45]:
dilemma_resp_groq= groq.chat.completions.create(model="openai/gpt-oss-120b",messages=dilemma_messag)

In [47]:
display(Markdown(dilemma_resp_groq.choices[0].message.content))

I’d choose **Share**. By cooperating, both players walk away with $1,000 each, which is better than the risk of ending up with nothing if both of us decide to steal.

In [48]:
dilemma_resp_ollama = ollama.chat.completions.create(model="gpt-oss:20b", messages=dilemma_messag)
display(Markdown(dilemma_resp_ollama.choices[0].message.content))

Steal.

#### Abstractions using LiteLLM and exploring the key features

In [5]:
from litellm import completion


In [6]:
def stream_resposne(response):
    stream = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in response:
        stream += chunk.choices[0].delta.content or ''
        update_display(Markdown(stream), display_id=display_handle.display_id)

In [7]:
response=completion(model="gemini/gemini-2.5-flash", messages=tell_a_joke, stream=True)
stream_resposne(response)

Why did the LLM engineer break up with their prompt?

Because after hours of meticulous crafting and fine-tuning, the prompt still just kept saying, "I'm sorry, but as an AI language model, I cannot provide relationship advice."

In [None]:
## calling ollama and get stream response
resp_ollama = completion(model="ollama/gpt-oss:20b", base_url="http://localhost:11434", messages=tell_a_joke, stream=True)
stream_resposne(resp_ollama)

In [12]:
resp_openai=completion(model="openai/gpt-4.1", messages=tell_a_joke)


In [34]:
print(f"input prompt tokens : {resp_openai.usage.prompt_tokens}")
print(f"output prompt tokens : {resp_openai.usage.completion_tokens}")
print(f"Total tokens : {resp_openai.usage.total_tokens}")


input prompt tokens : 24
output prompt tokens : 36
Total tokens : 60


#### lite llm's prompt caching

In [67]:
question = [{"role": "user", "content": "In Hamlet, when Laertes asks 'Where is my father?' what is the reply?"}]
resp_gemini=completion(model="gemini/gemini-2.5-flash-lite", messages=question)


In [68]:
display(Markdown(resp_gemini.choices[0].message.content))

In Shakespeare's *Hamlet*, when Laertes returns to Denmark in a rage and demands to know "Where is my father?", the reply comes from **Gertrude, the Queen**.

She says: **"One thing to think on."**

This is a deliberately evasive and unsettling answer, hinting at the turmoil and the hidden truth of Polonius's death without directly revealing it. She's clearly trying to control the situation and perhaps soften the blow of the terrible news.

In [69]:
resp_gemini.usage.prompt_tokens_details.cached_tokens

In [70]:
print(f"Input tokens: {resp_gemini.usage.prompt_tokens}")
print(f"Output tokens: {resp_gemini.usage.completion_tokens}")
print(f"Total tokens: {resp_gemini.usage.total_tokens}")
#print(f"Total cost: {resp_gemini._hidden_params["response_cost"]*100:.4f} cents")
print(f"Cached tokens: {resp_gemini.usage.prompt_tokens_details.cached_tokens}")

Input tokens: 19
Output tokens: 100
Total tokens: 119
Cached tokens: None


In [71]:
with open("hamlet.txt", "r", encoding="utf-8") as f:
    hamlet = f.read()
question[0]["content"] += f"here is the Hamlet play for the context {hamlet}"
resp_gemini=completion(model="gemini/gemini-2.5-flash-lite", messages=question)
resp_gemini.choices[0].message.content



In [65]:
display(Markdown(resp_gemini.choices[0].message.content))

When Laertes bursts into the throne room in Act IV, Scene V, demanding to know where his father is, the Queen's reply is:

**"Alas, my lord, he is dead."**

In [72]:
print(f"Input tokens: {resp_gemini.usage.prompt_tokens}")
print(f"Output tokens: {resp_gemini.usage.completion_tokens}")
print(f"Total tokens: {resp_gemini.usage.total_tokens}")
#print(f"Total cost: {resp_gemini._hidden_params["response_cost"]*100:.4f} cents")
print(f"Cached tokens: {resp_gemini.usage.prompt_tokens_details.cached_tokens}")

Input tokens: 53203
Output tokens: 108
Total tokens: 53311
Cached tokens: 52216
