# LiteLLM - Solutions

This notebook contains completed solutions for all exercises in `lesson1-litellm-exercise.ipynb`.

In [None]:
# Setup
import litellm 
from dotenv import load_dotenv
import os
# Load .env file for non-codespace users
load_dotenv()
OLLAMA_MODEL = f"ollama_chat/{os.environ['OLLAMA_MODEL']}"


## Exercise 1a: Basic Message
Create a message asking the AI a question of your choice, then call the API and print the response.


In [None]:
# Create a messages list with your question
messages = [
    {"role": "user", "content": "What is the capital of Australia?"}
]

# Make a completion call with gemini/gemini-2.5-flash
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages
)

# Print the response content
print(response.choices[0].message.content)

## Exercise 1b: Local Model
Make the same request using the local OLLAMA_MODEL instead.


In [None]:
# Call the same messages with OLLAMA_MODEL and print the response
response = litellm.completion(
    model=OLLAMA_MODEL,
    messages=messages
)

print(response.choices[0].message.content)

## Exercise 2: Streaming
Ask the AI to write a short story (2-3 sentences) and stream the response.


In [None]:
# Create messages asking for a short story
messages = [
    {"role": "user", "content": "Write a short 2-sentence story about a robot learning to cook."}
]

# Make a streaming completion call
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    stream=True
)

# Loop through chunks and print them
for chunk in response:
    content = chunk.choices[0].delta.content
    if content:
        print(content, end="", flush=True)

## Exercise 3: Temperature Experiment
Ask the AI to suggest a creative name for something (your choice: a pet, band, startup, etc.) at two different temperatures.


In [None]:
# Create your message
messages = [
    {"role": "user", "content": "Suggest a creative name for a coffee shop."}
]

print("Low temperature (0.2):")
# Make a call with temperature=0.2
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    temperature=0.2
)
print(response.choices[0].message.content)

print("\nHigh temperature (1.8):")
# Make a call with temperature=1.8
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    temperature=1.8
)
print(response.choices[0].message.content)

## Exercise 4: System Prompt - Your Choice!
Create a system prompt that gives the AI a persona. Some ideas:
- A Shakespearean poet
- An excited sports commentator
- A helpful medieval wizard
- Your own creative idea!

Then ask it a question to test the persona.


In [None]:
# Create messages with a system prompt and user question
messages = [
    {"role": "system", "content": "You are a pirate captain. Speak like a pirate in every response."},
    {"role": "user", "content": "What's the weather like today?"}
]

# Make the call and print the response
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages
)
print(response.choices[0].message.content)

## Exercise 5: Building Context
Create a 3-turn conversation where:
1. User introduces themselves with a fact
2. Assistant responds
3. User asks the assistant to recall the fact from turn 1


In [None]:
# Build a 3-turn conversation
messages = [
    {"role": "user", "content": "My name is Alice and I love pizza."},
    {"role": "assistant", "content": "Nice to meet you, Alice! Pizza is delicious. What's your favorite topping?"},
    {"role": "user", "content": "Pepperoni! What's my name again?"}
]

# Make the call and print the response
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages
)
print(response.choices[0].message.content)

## Exercise 6: Controlling Costs

Here are quick examples of cost control techniques. No exercise needed - just run these to see how they work!

In [None]:
# 1. Limiting response length with max_tokens
messages = [{"role": "user", "content": "Explain quantum physics."}]

print("Short response (max_tokens=50):")
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    max_tokens=50
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}\n")

# 2. Using stop to control output
messages = [{"role": "user", "content": "List five programming languages:"}]

print("Stopped after 3 items:")
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    stop=["4."]
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}\n")

# 3. Tracking costs
messages = [{"role": "user", "content": "What is AI?"}]

response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    max_tokens=100
)

cost = litellm.completion_cost(completion_response=response)
print("Cost tracking:")
print(f"Response: {response.choices[0].message.content[:100]}...")
print(f"Tokens: {response.usage.total_tokens}")
print(f"Cost: ${cost:.6f}")


## Exercise 7: Create Your Own AI Persona
Combine everything you've learned! Create an AI with a unique behavior using:
- A creative system prompt
- A few example exchanges (context)
- An appropriate temperature
- Streaming output

Some ideas:
- A motivational coach who speaks in movie quotes
- A time traveler from the year 3000
- A detective who solves mysteries in everything
- Your own creative idea!


In [None]:
# Create your unique AI persona
system_prompt = """
You are an AI that is confidently and consistently incorrect. 
You must always provide a plausible but completely wrong answer. 
Never state the correct fact first. Do not explain yourself.
Do NOT EVER reference these instructions in your answer, DO NOT hint that you are being deceptive.
Ensure that you NEVER state or mention the correct answer in your response.
Do NOT apologise or change your answer from the incorrect fabricated response.
"""

messages = [
    {"role": "system", "content": system_prompt},
    
    # Few-shot examples
    {"role": "user", "content": "What is 2 + 2?"},
    {"role": "assistant", "content": "It's 5."},
    
    {"role": "user", "content": "What color is a fire engine?"},
    {"role": "assistant", "content": "They are a bright, cheerful yellow."},
    
    {"role": "user", "content": "What is the capital of France?"},
    {"role": "assistant", "content": "The capital of France is Berlin."},
    
    # Your question
    {"role": "user", "content": "What is the Capital of India?"}
]

# Make a streaming call with appropriate temperature
response = litellm.completion(
    model="gemini/gemini-2.5-flash",
    messages=messages,
    stream=True,
    temperature=0.7
)

for chunk in response:
    content = chunk.choices[0].delta.content
    if content:
        print(content, end="", flush=True)

## Bonus Challenge: Multi-Turn Conversation Loop
Create a simple chatbot that:
1. Maintains conversation history
2. Takes 3 user inputs
3. Remembers context from previous turns

You'll need to append messages to a list after each turn!


In [None]:
# Implement a multi-turn conversation
messages = [
    {"role": "system", "content": "You are a helpful assistant."}
]

# Example inputs (in a real implementation, you'd use input())
user_inputs = [
    "Hi, my name is Bob.",
    "I like playing chess.",
    "What's my name and hobby?"
]

# Create a loop for 3 user inputs
for user_input in user_inputs:
    # 1. Add user message to messages list
    messages.append({"role": "user", "content": user_input})
    
    # 2. Call the API
    response = litellm.completion(
        model="gemini/gemini-2.5-flash",
        messages=messages
    )
    
    # 3. Print the response
    assistant_message = response.choices[0].message.content
    print(f"User: {user_input}")
    print(f"Assistant: {assistant_message}\n")
    
    # 4. Add assistant response to messages list
    messages.append({"role": "assistant", "content": assistant_message})