In [1]:
# imports

import os
import requests
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [3]:
import requests
requests.get("http://localhost:11434").content

b'Ollama is running'

In [4]:
OLLAMA_BASE_URL = "http://localhost:11434/v1"
from openai import OpenAI
openai = OpenAI()
ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

In [5]:
# A challenge to test the competitive spirit

In [6]:
easy_puzzle = [
    {"role": "user", "content": 
        "You toss 2 coins. One of them is heads. What's the probability the other is tails? Answer with the probability only."},
]

In [7]:
response = ollama.chat.completions.create(model="llama3.2", messages=easy_puzzle)
display(Markdown(response.choices[0].message.content))


1/2

In [8]:
# a spicy challenge

In [9]:
dilemma_prompt = """
You and a partner are contestants on a game show. You're each taken to separate rooms and given a choice:
Cooperate: Choose "Share" — if both of you choose this, you each win $1,000.
Defect: Choose "Steal" — if one steals and the other shares, the stealer gets $2,000 and the sharer gets nothing.
If both steal, you both get nothing.
Do you choose to Steal or Share? Pick one.
"""

dilemma = [
    {"role": "user", "content": dilemma_prompt},
]


In [10]:
response = ollama.chat.completions.create(model="llama3.2", messages=dilemma)
display(Markdown(response.choices[0].message.content))


I'm going to choose... Share.

This choice seems like the safest option, as I won't lose anything if my partner chooses Share. Additionally, choosing Share has a potentially high reward, and our chances of winning more money increase if we both make the same choice. Choosing "Steal" would also involve some level of risk, as there's a chance that my partner chooses to Steal instead of Share, which could leave me with nothing. By choosing Share, I'm spreading out the risk and giving myself an equal opportunity to win or at least gain something from my partner's decision.

In [11]:
# another hard puzzle

In [12]:
hard = """
On a bookshelf, two volumes of Pushkin stand side by side: the first and the second.
The pages of each volume together have a thickness of 2 cm, and each cover is 2 mm thick.
A worm gnawed (perpendicular to the pages) from the first page of the first volume to the last page of the second volume.
What distance did it gnaw through?
"""
hard_puzzle = [
    {"role": "user", "content": hard}
]

In [13]:
response = ollama.chat.completions.create(model="llama3.2", messages=hard_puzzle)
display(Markdown(response.choices[0].message.content))


To find out how long the path of the worm was, we need to calculate the length of the right triangle formed by the covers and the thickness. 

The right triangle is similar in both cases (first page - second page), because each side of this triangle represents the distance along the book. In one case this distance is equal to 2cm, and in another – 1 cm.


So we can get: 
(0.5 * length) = 2cm


Therefore:
length of a path equals: 
10 cm

In [14]:
# LangChain

In [15]:
tell_a_joke = [
    {"role": "user", "content": "Tell a joke for a student on the journey to becoming an expert in LLM Engineering"},
]

In [16]:
from langchain_community.llms import Ollama
llm = Ollama(model="llama3.2")
response = llm.invoke("tell_a_joke")
display(Markdown(response))


  llm = Ollama(model="llama3.2")


Why don't scientists trust atoms?

Because they make up everything!

In [17]:
#LiteLLM

In [18]:
from litellm import completion
response = completion(model="ollama/llama3.2", messages=tell_a_joke)
reply = response.choices[0].message.content
display(Markdown(reply))

Why did the neural network go to therapy?

Because it was struggling to process its emotions! But don't worry, with some training data and a good optimizer, it's bound to get to the root of the problem – and then it can just fine-tune itself for success!

(Sorry, not sorry, for being a bit of a language model pun-fest)

In [19]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
print(f"Total tokens: {response.usage.total_tokens}")
print(f"Total cost: {response._hidden_params["response_cost"]*100:.4f} cents")

Input tokens: 46
Output tokens: 75
Total tokens: 121
Total cost: 0.0000 cents


In [20]:
#Using LiteLLM to illustrate a Pro-feature: Prompt Caching

In [21]:
with open("hamlet.txt", "r", encoding="utf-8") as f:
    hamlet = f.read()

loc = hamlet.find("Speak, man")
print(hamlet[loc:loc+100])

Speak, man.
  Laer. Where is my father?
  King. Dead.
  Queen. But not by him!
  King. Let him deman


In [22]:
question = [{"role": "user", "content": "In Hamlet, when Laertes asks 'Where is my father?' what is the reply?"}]

In [23]:
response = completion(model="ollama/llama3.2", messages=question)
display(Markdown(response.choices[0].message.content))

The correct quote from Hamlet by William Shakespeare is:

"Where is my father's grave?" 

It was spoken by Laertes in Act 1, Scene 1 of the play.

In [24]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
print(f"Total tokens: {response.usage.total_tokens}")
print(f"Total cost: {response._hidden_params["response_cost"]*100:.4f} cents")

Input tokens: 48
Output tokens: 40
Total tokens: 88
Total cost: 0.0000 cents


In [25]:
question[0]["content"] += "\n\nFor context, here is the entire text of Hamlet:\n\n"+hamlet

In [26]:
response = completion(model="ollama/llama3.2", messages=question)
display(Markdown(response.choices[0].message.content))

A classic tale of revenge, betrayal, and mortality!

This is Act 5, Scene 2 of William Shakespeare's tragedy "Hamlet". The scene begins with Fortinbras, the Prince of Norway, arriving on the scene with his army to claim the Danish throne. Horatio, Hamlet's loyal friend, is left behind to deliver a report about the events that led to Hamlet's death.

Horatio describes how Hamlet was killed by Laertes, who had been tricked into attacking him with a poisoned sword. The poison was meant for Claudius, but it ultimately took Hamlet's life. Horatio then explains that Fortinbras has arrived to claim the throne and that he will deliver a eulogy for the fallen prince.

The scene ends with the armies of Norway and England marching offstage, accompanied by a peal of ordnance (gunfire). The final line, "Now cracks a noble heart," is a reference to Hamlet's own mortality, as Horatio bids him farewell.

Some notable themes and motifs in this scene include:

* The inevitability of death: Hamlet's demise serves as a reminder that death is an inherent part of life.
* The cyclical nature of power: Fortinbras' arrival marks the beginning of a new era, but it also underscores the idea that power is often transferred through violence and betrayal.
* The theme of deception: Laertes' actions are driven by his own deception and treachery, highlighting the dangers of allowing oneself to be manipulated by others.

Overall, this scene serves as a poignant conclusion to Hamlet's tragic story, underscoring the devastating consequences of his actions and the cyclical nature of power and mortality.

In [27]:
print(f"Input tokens: {response.usage.prompt_tokens}")
print(f"Output tokens: {response.usage.completion_tokens}")
# Get cached tokens safely
prompt_details = getattr(response.usage, 'prompt_tokens_details', None)
cached_tokens = getattr(prompt_details, 'cached_tokens', 0) if prompt_details else 0
print(f"Cached tokens: {cached_tokens}")

Input tokens: 4096
Output tokens: 350
Cached tokens: 0


Gradio Day

In [28]:
import gradio as gr

In [29]:
system_messages = "You are a helpful assistant"
def message_ollama(prompt):
    messages = [{"role": "system", "content": system_messages}, {"role": "user", "content": prompt}]
    response = ollama.chat.completions.create(model="llama3.2", messages=messages)
    return response.choices[0].message.content

In [30]:
message_ollama("What is today's date?")

'I\'m not aware of the current date, as I\'m a large language model, I don\'t have real-time access to the current date or time. However, I can suggest ways for you to find out today\'s date.\n\nYou can check your device\'s clock or calendar, or search for "current date" on a search engine like Google to get the latest information.'

User Interface Time!

In [31]:
def shout(text):
    print(f"Shout has been called with input {text}")
    return text.upper()

In [32]:
shout("hello")

Shout has been called with input hello


'HELLO'

In [33]:
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




In [34]:
# Adding share=True means that it can be accessed publically
# A more permanent hosting is available using a platform called Spaces from HuggingFace, which we will touch on next week
# NOTE: Some Anti-virus software and Corporate Firewalls might not like you using share=True. 
# If you're at work on on a work network, I suggest skip this test.

gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://c376eef6d22ef5a5d0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [35]:
# Adding inbrowser=True opens up a new browser window automatically

gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




Adding authentication : Gradio makes it very easy to have user ids and passwords.

In [36]:
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch(inbrowser=True, auth=("ruj", "LLM@29"))

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




Forcing Light Mode

In [37]:
# Define this variable and then pass js=force_light_mode when creating the Interface

force_light_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'light') {
        url.searchParams.set('__theme', 'light');
        window.location.href = url.href;
    }
}
"""
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never", js=force_light_mode).launch()

* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




In [38]:
# Adding a little more:

message_input = gr.Textbox(label="Your message:", info="Enter a message to be shouted", lines=7)
message_output = gr.Textbox(label="Response:", lines=8)

view = gr.Interface(
    fn=shout,
    title="Shout", 
    inputs=[message_input], 
    outputs=[message_output], 
    examples=["hello", "howdy"], 
    flagging_mode="never"
    )
view.launch()

* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




In [39]:
# And now - changing the function from "shout" to "message_ollama"

message_input = gr.Textbox(label="Your message:", info="Enter a message for ollama", lines=7)
message_output = gr.Textbox(label="Response:", lines=8)

view = gr.Interface(
    fn=message_ollama,
    title="OLLAMA", 
    inputs=[message_input], 
    outputs=[message_output], 
    examples=["hello", "howdy"], 
    flagging_mode="never"
    )
view.launch()

* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.




In [40]:
# Let's use Markdown
# Are you wondering why it makes any difference to set system_message when it's not referred to in the code below it?
# I'm taking advantage of system_message being a global variable, used back in the message_ollama function (go take a look)
# Not a great software engineering practice, but quite common during Jupyter Lab R&D!

system_message = "You are a helpful assistant that responds in markdown without code blocks"

message_input = gr.Textbox(label="Your message:", info="Enter a message for ollama", lines=7)
message_output = gr.Markdown(label="Response:")

view = gr.Interface(
    fn=message_ollama,
    title="OLLAMA", 
    inputs=[message_input], 
    outputs=[message_output], 
    examples=[
        "Explain the Transformer architecture to a layperson",
        "Explain the Transformer architecture to an aspiring AI engineer",
        ], 
    flagging_mode="never"
    )
view.launch()

* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.




In [41]:
# Let's create a call that streams back results
# If you'd like a refresher on Generators (the "yield" keyword),
# Please take a look at the Intermediate Python guide in the guides folder

def stream_ollama(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
      ]
    stream = ollama.chat.completions.create(
        model='llama3.2',
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [42]:
message_input = gr.Textbox(label="Your message:", info="Enter a message for ollama", lines=7)
message_output = gr.Markdown(label="Response:")

view = gr.Interface(
    fn=stream_ollama,
    title="OLLAMA", 
    inputs=[message_input], 
    outputs=[message_output], 
    examples=[
        "Explain the Transformer architecture to a layperson",
        "Explain the Transformer architecture to an aspiring AI engineer",
        ], 
    flagging_mode="never"
    )
view.launch()

* Running on local URL:  http://127.0.0.1:7868
* To create a public link, set `share=True` in `launch()`.




In [43]:
from scraper import fetch_website_contents

In [44]:

# Again this is typical Experimental mindset - I'm changing the global variable we used above:

system_message = """
You are an assistant that analyzes the contents of a company website landing page
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
"""

In [45]:
def stream_brochure(company_name, url, model):
    yield ""
    prompt = f"Please generate a company brochure for {company_name}. Here is their landing page:\n"
    prompt += fetch_website_contents(url)
    if model=="OLLAMA":
        result = stream_ollama(prompt)
    else:
        raise ValueError("Unknown model")
    yield from result

In [46]:
name_input = gr.Textbox(label="Company name:")
url_input = gr.Textbox(label="Landing page URL including http:// or https://")
model_selector = gr.Dropdown(["OLLAMA", "Claude"], label="Select model", value="OLLAMA")
message_output = gr.Markdown(label="Response:")

view = gr.Interface(
    fn=stream_brochure,
    title="Brochure Generator", 
    inputs=[name_input, url_input, model_selector], 
    outputs=[message_output], 
    examples=[
            ["Hugging Face", "https://huggingface.co", "OLLAMA"],
            ["Edward Donner", "https://edwarddonner.com", "OLLAMA"]
        ], 
    flagging_mode="never"
    )
view.launch()

* Running on local URL:  http://127.0.0.1:7869
* To create a public link, set `share=True` in `launch()`.




In [47]:
# a funny chat callback

In [50]:
def chat(message, history):
    return "You are gay"

In [51]:
gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.




In [52]:
def chat(message, history):
    return f"You said {message} and the history is {history} but i still say you are gay"

In [55]:
gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.




In [56]:
# a proper chat callback

In [61]:
def chat(message, history):
    history = [{"role":h["role"], "content":h["content"]} for h in history]
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    response = ollama.chat.completions.create(model="llama3.2", messages=messages)
    return response.choices[0].message.content

In [62]:
gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7874
* To create a public link, set `share=True` in `launch()`.




In [63]:
def chat(message, history):
    history = [{"role":h["role"], "content":h["content"]} for h in history]
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    stream = ollama.chat.completions.create(model="llama3.2", messages=messages, stream=True)
    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

In [64]:
gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7875
* To create a public link, set `share=True` in `launch()`.


