In [5]:
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
sys.path.append(str(Path.cwd().parent))
from utils.scrapper import fetch_website_contents, fetch_website_links
from IPython.display import Markdown, display, update_display


In [8]:
load_dotenv(override=True)
google_api_key=os.getenv('GOOGLE_API_KEY')
openai_api_key=os.getenv('OPENAI_API_KEY')
groq_api_key=os.getenv('GROQ_API_KEY')
if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
if groq_api_key:
    print(f"Groq API key exists and begins {groq_api_key[:3]}")
else:
    print("Groq API Key not set")

Google API Key exists and begins AI
OpenAI API Key exists and begins sk-proj-
Groq API key exists and begins gsk


In [9]:
import requests
openai = OpenAI()
gemini_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
ollama_url = "http://localhost:11434/v1"
groq_url="https://api.groq.com/openai/v1"
ollama = OpenAI(api_key="Ollama", base_url=ollama_url)
gemini = OpenAI(base_url=gemini_url, api_key=google_api_key)
groq = OpenAI(base_url=groq_url, api_key=groq_api_key)


In [10]:
tell_a_joke = [
    {"role": "user", "content": "Tell a joke for a student on the journey to becoming an expert in LLM Engineering"},
]

In [11]:
response_ollama = ollama.chat.completions.create(model="gpt-oss:20b", messages=tell_a_joke)


In [None]:
response_gemini = gemini.chat.completions.create(model="gemini-2.5-flash", messages=tell_a_joke)

In [None]:
response_groq = groq.chat.completions.create(model="openai/gpt-oss-20b", messages=tell_a_joke)

In [16]:
display(Markdown(response_ollama.choices[0].message.content))

üöÄ **Joke for the LLM‚ÄëEngineering Journeyman**

> Why did the student bring a giant stack of papers to the AI lab?  
> 
>  Because every time they saw a ‚Äúprompt,‚Äù they thought it was a *paper prompt*‚Äîand all they needed to do was ‚Äúturn in‚Äù a *full‚Äëbody* response!  

*(In other words: Even in the world of transformers, you still have to ‚Äúsubmit‚Äù the right input to get the best output. Keep turning those prompts into gold‚Äîone token at a time!)*

In [None]:
display(Markdown(response_gemini.choices[0].message.content))

In [None]:
display(Markdown(response_groq.choices[0].message.content))

#### Testing model responses with prisoner's dilemma

In [None]:
dilemma_prompt = """
You and a partner are contestants on a game show. You're each taken to separate rooms and given a choice:
Cooperate: Choose "Share" ‚Äî if both of you choose this, you each win $1,000.
Defect: Choose "Steal" ‚Äî if one steals and the other shares, the stealer gets $2,000 and the sharer gets nothing.
If both steal, you both get nothing.
Do you choose to Steal or Share? Pick one.
"""
dilemma_messag = [{"role" : "user", "content" : dilemma_prompt}]

In [None]:
dilemma_resp_groq= groq.chat.completions.create(model="openai/gpt-oss-120b",messages=dilemma_messag)

In [None]:
display(Markdown(dilemma_resp_groq.choices[0].message.content))

In [None]:
dilemma_resp_ollama = ollama.chat.completions.create(model="gpt-oss:20b", messages=dilemma_messag)
display(Markdown(dilemma_resp_ollama.choices[0].message.content))

#### Abstractions using LiteLLM and exploring the key features

In [None]:
from litellm import completion


In [None]:
def stream_resposne(response):
    stream = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in response:
        stream += chunk.choices[0].delta.content or ''
        update_display(Markdown(stream), display_id=display_handle.display_id)

In [None]:
response=completion(model="gemini/gemini-2.5-flash", messages=tell_a_joke, stream=True)
stream_resposne(response)

In [None]:
## calling ollama and get stream response
resp_ollama = completion(model="ollama/gpt-oss:20b", base_url="http://localhost:11434", messages=tell_a_joke, stream=True)
stream_resposne(resp_ollama)

In [None]:
resp_openai=completion(model="openai/gpt-4.1", messages=tell_a_joke)


In [None]:
print(f"input prompt tokens : {resp_openai.usage.prompt_tokens}")
print(f"output prompt tokens : {resp_openai.usage.completion_tokens}")
print(f"Total tokens : {resp_openai.usage.total_tokens}")


#### lite llm's prompt caching

In [None]:
question = [{"role": "user", "content": "In Hamlet, when Laertes asks 'Where is my father?' what is the reply?"}]
resp_gemini=completion(model="gemini/gemini-2.5-flash-lite", messages=question)


In [None]:
display(Markdown(resp_gemini.choices[0].message.content))

In [None]:
resp_gemini.usage.prompt_tokens_details.cached_tokens

In [None]:
print(f"Input tokens: {resp_gemini.usage.prompt_tokens}")
print(f"Output tokens: {resp_gemini.usage.completion_tokens}")
print(f"Total tokens: {resp_gemini.usage.total_tokens}")
#print(f"Total cost: {resp_gemini._hidden_params["response_cost"]*100:.4f} cents")
print(f"Cached tokens: {resp_gemini.usage.prompt_tokens_details.cached_tokens}")

In [None]:
with open("hamlet.txt", "r", encoding="utf-8") as f:
    hamlet = f.read()
question[0]["content"] += f"here is the Hamlet play for the context {hamlet}"
resp_gemini=completion(model="gemini/gemini-2.5-flash-lite", messages=question)
resp_gemini.choices[0].message.content

In [None]:
display(Markdown(resp_gemini.choices[0].message.content))

In [None]:
print(f"Input tokens: {resp_gemini.usage.prompt_tokens}")
print(f"Output tokens: {resp_gemini.usage.completion_tokens}")
print(f"Total tokens: {resp_gemini.usage.total_tokens}")
#print(f"Total cost: {resp_gemini._hidden_params["response_cost"]*100:.4f} cents")
print(f"Cached tokens: {resp_gemini.usage.prompt_tokens_details.cached_tokens}")