In [1]:
#Pulling
from tqdm import tqdm
from ollama import pull


current_digest, bars = '', {}
for progress in pull('llama2', stream=True):
  digest = progress.get('digest', '')
  if digest != current_digest and current_digest in bars:
    bars[current_digest].close()

  if not digest:
    print(progress.get('status'))
    continue

  if digest not in bars and (total := progress.get('total')):
    bars[digest] = tqdm(total=total, desc=f'pulling {digest[7:19]}', unit='B', unit_scale=True)

  if completed := progress.get('completed'):
    bars[digest].update(completed - bars[digest].n)

  current_digest = digest

pulling manifest


pulling 8934d96d3f08: 100%|██████████| 3.83G/3.83G [00:00<00:00, 3.82TB/s]
pulling 8c17c2ebb0ea: 100%|██████████| 7.02k/7.02k [00:00<?, ?B/s]
pulling 7c23fb36d801: 100%|██████████| 4.77k/4.77k [00:00<?, ?B/s]
pulling 2e0493f67d0c: 100%|██████████| 59.0/59.0 [00:00<?, ?B/s]
pulling fa304d675061: 100%|██████████| 91.0/91.0 [00:00<?, ?B/s]
pulling 42ba7f8a01dd: 100%|██████████| 557/557 [00:00<?, ?B/s] 

verifying sha256 digest
writing manifest
success





In [2]:
#PS

from ollama import ps, pull, chat

response = pull('llama2', stream=True)
progress_states = set()
for progress in response:
  if progress.get('status') in progress_states:
    continue
  progress_states.add(progress.get('status'))
  print(progress.get('status'))

print('\n')

response = chat('llama2', messages=[{'role': 'user', 'content': 'Hello!'}])
print(response['message']['content'])

print('\n')

response = ps()

name = response['models'][0]['name']
size = response['models'][0]['size']
size_vram = response['models'][0]['size_vram']

if size == size_vram:
  print(f'{name}: 100% GPU')
elif not size_vram:
  print(f'{name}: 100% CPU')
else:
  size_cpu = size - size_vram
  cpu_percent = round(size_cpu / size * 100)
  print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')

pulling manifest
pulling 8934d96d3f08
pulling 8c17c2ebb0ea
pulling 7c23fb36d801
pulling 2e0493f67d0c
pulling fa304d675061
pulling 42ba7f8a01dd
verifying sha256 digest
writing manifest
success


Hello there! It's nice to meet you. How are you today?


llama2:latest: 38% CPU/62% GPU


In [3]:
#Chat_stream
from ollama import chat


messages = [
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
]

for part in chat('llama2', messages=messages, stream=True):
  print(part['message']['content'], end='', flush=True)

# end with a newline
print()


The sky appears blue because of a phenomenon called Rayleigh scattering. When sunlight enters Earth's atmosphere, it encounters tiny molecules of gases such as nitrogen and oxygen. These molecules scatter the light in all directions, but they scatter shorter (blue) wavelengths more than longer (red) wavelengths. This is known as Rayleigh scattering.

As a result of this scattering, the blue light is dispersed throughout the atmosphere, giving the sky its blue appearance. The blue light can travel much farther through the atmosphere than the red light, so even when the sun is behind the horizon, the blue light can still reach our eyes and give us a blue sky.

In addition to Rayleigh scattering, there are other factors that can contribute to the color of the sky, such as dust particles, water vapor, and pollution. However, these factors generally have less of an impact on the overall appearance of the sky than Rayleigh scattering.

It's worth noting that the color of the sky can vary de

In [4]:
#Chat 
import ollama
import os
from ollama import create
from ollama import chat

messages = [
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
]

response = chat('llama2', messages=messages)
print(response['message']['content'])



The sky appears blue because of a phenomenon called Rayleigh scattering, which occurs when sunlight enters Earth's atmosphere. The sunlight encounters tiny molecules of gases in the air, such as nitrogen and oxygen, which scatters the light in all directions.

The shorter, blue wavelengths of light are scattered more than the longer, red wavelengths, resulting in a blue sky. This is known as Rayleigh scattering, named after the English physicist Lord Rayleigh, who first described the phenomenon in the late 19th century.

The blue color of the sky can also be affected by other factors such as air pollution, dust, and water vapor. However, these effects are generally not as pronounced as the Rayleigh scattering effect.

It's worth noting that the blue color of the sky is not always constant and can vary depending on the time of day, weather conditions, and altitude. For example, during sunrise and sunset, the sky can take on hues of red, orange, and pink due to the angle of the sunlight

In [5]:
#Gerenate-stream 
from ollama import generate


for part in generate('llama2', 'Why is the sky blue?', stream=True):
  print(part['response'], end='', flush=True)
  



The sky appears blue because of a phenomenon called Rayleigh scattering. When sunlight enters Earth's atmosphere, it encounters tiny molecules of gases such as nitrogen and oxygen. These molecules scatter the light in all directions, but they scatter shorter (blue) wavelengths more than longer (red) wavelengths. This is known as Rayleigh scattering.

As a result of this scattering, the blue light is distributed throughout the atmosphere, giving the sky its blue color. The red light, on the other hand, passes through the atmosphere mostly unscattered and reaches our eyes directly, appearing redder than the blue light. This is why the sky can appear different shades of blue during different times of day and in different conditions, depending on the amount of sunlight and the amount of particles in the air.

It's worth noting that the color of the sky can also be affected by other factors such as pollution, dust, and water vapor, which can scatter light in different ways and produce a ra

In [8]:
#Generate
from ollama import generate


response = generate('llama2', 'Why is the sky blue?')
print(response['response'])


The sky appears blue because of a phenomenon called Rayleigh scattering. When sunlight enters Earth's atmosphere, it encounters tiny molecules of gases such as nitrogen and oxygen. These molecules scatter the light in all directions, but they scatter shorter (blue) wavelengths more than longer (red) wavelengths. This is known as Rayleigh scattering.

As a result of this scattering, the blue light is dispersed throughout the atmosphere, giving the sky its blue color. The red light, on the other hand, travels through the atmosphere without being scattered, so it appears more direct and reaches our eyes as red light. This is why the sky can sometimes appear to be a different color depending on the time of day and atmospheric conditions.

It's worth noting that the blue color of the sky is not the only reason why we see colors in nature. The colors we see in flowers, leaves, and other objects are also due to the way light interacts with their surfaces and the pigments they contain.


In [6]:
#Multimodal 
import random
import httpx
from ollama import generate

# Fetch the latest XKCD comic metadata
latest = httpx.get('https://xkcd.com/info.0.json')
latest.raise_for_status()

# You can manually provide a number for the comic, or use a random comic number.
# Instead of using `sys.argv`, manually set the comic number here:
num = random.randint(1, latest.json().get('num'))  # Random comic number

# Fetch the specific comic based on the number
comic = httpx.get(f'https://xkcd.com/{num}/info.0.json')
comic.raise_for_status()

# Print out comic details
print(f'xkcd #{comic.json().get("num")}: {comic.json().get("alt")}')
print(f'link: https://xkcd.com/{num}')
print('---')

# Fetch the image from the comic URL
raw = httpx.get(comic.json().get('img'))
raw.raise_for_status()

# Generate explanation for the comic using the 'llava' model
for response in generate('llama2', 'explain this comic:', images=[raw.content], stream=True):
    print(response['response'], end='', flush=True)

print()

xkcd #570: Somewhere out there is a company that has actually figured out how to enlarge penises, and it's helpless to reach potential customers.
link: https://xkcd.com/570
---

The comic you provided is a classic example of a "before and after" joke. It shows two contrasting scenarios, with the first one depicting a messy and disorganized space, and the second one showing a neat and organized space. The punchline of the comic is that the before and after scenes are the same room, implying that the change in organization is superficial and does not actually solve any underlying problems.

Here's a breakdown of the comic:

1. The first panel shows a cluttered and disorganized room with clothes scattered all over the floor, empty food boxes and trash cans, and general chaos.
2. The second panel shows the same room, but now it is neat and organized. There are clean clothes in the closet, food boxes are closed and put away, and the trash cans are emptied.
3. The punchline of the comic is r

In [10]:
#Fill-middle
from ollama import generate

prompt = '''def remove_non_ascii(s: str) -> str:
    """ '''

suffix = """
    return result
"""

response = generate(
  model='llama2',
  prompt=prompt,
  suffix=suffix,
  options={
    'num_predict': 128,
    'temperature': 0,
    'top_p': 0.9,
    'stop': ['<EOT>'],
  },
)

print(response['response'])

ResponseError: llama2 does not support insert

In [14]:
#Create
import sys

from ollama import create


args = sys.argv[1:]
if len(args) == 2:
  # create from local file
  path = args[1]
else:
  print('usage: python main.py <name> <filepath>')
  sys.exit(1)

# TODO: update to real Modelfile values
modelfile = f"""
FROM {path}
"""

for response in create(model=args[0], modelfile=modelfile, stream=True):
  print(response['status'])

usage: python main.py <name> <filepath>


SystemExit: 1

In [15]:
#Async chat-stream
import shutil
import asyncio
import argparse

import ollama


async def speak(speaker, content):
  if speaker:
    p = await asyncio.create_subprocess_exec(speaker, content)
    await p.communicate()


async def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--speak', default=False, action='store_true')
  args = parser.parse_args()

  speaker = None
  if not args.speak:
    ...
  elif say := shutil.which('say'):
    speaker = say
  elif (espeak := shutil.which('espeak')) or (espeak := shutil.which('espeak-ng')):
    speaker = espeak

  client = ollama.AsyncClient()

  messages = []

  while True:
    if content_in := input('>>> '):
      messages.append({'role': 'user', 'content': content_in})

      content_out = ''
      message = {'role': 'assistant', 'content': ''}
      async for response in await client.chat(model='mistral', messages=messages, stream=True):
        if response['done']:
          messages.append(message)

        content = response['message']['content']
        print(content, end='', flush=True)

        content_out += content
        if content in ['.', '!', '?', '\n']:
          await speak(speaker, content_out)
          content_out = ''

        message['content'] += content

      if content_out:
        await speak(speaker, content_out)
      print()


try:
  asyncio.run(main())
except (KeyboardInterrupt, EOFError):
  ...

RuntimeError: asyncio.run() cannot be called from a running event loop

In [16]:
#Tools
import json
import ollama
import asyncio


# Simulates an API call to get flight times
# In a real application, this would fetch data from a live database or API
def get_flight_times(departure: str, arrival: str) -> str:
  flights = {
    'NYC-LAX': {'departure': '08:00 AM', 'arrival': '11:30 AM', 'duration': '5h 30m'},
    'LAX-NYC': {'departure': '02:00 PM', 'arrival': '10:30 PM', 'duration': '5h 30m'},
    'LHR-JFK': {'departure': '10:00 AM', 'arrival': '01:00 PM', 'duration': '8h 00m'},
    'JFK-LHR': {'departure': '09:00 PM', 'arrival': '09:00 AM', 'duration': '7h 00m'},
    'CDG-DXB': {'departure': '11:00 AM', 'arrival': '08:00 PM', 'duration': '6h 00m'},
    'DXB-CDG': {'departure': '03:00 AM', 'arrival': '07:30 AM', 'duration': '7h 30m'},
  }

  key = f'{departure}-{arrival}'.upper()
  return json.dumps(flights.get(key, {'error': 'Flight not found'}))


async def run(model: str):
  client = ollama.AsyncClient()
  # Initialize conversation with a user query
  messages = [{'role': 'user', 'content': 'What is the flight time from New York (NYC) to Los Angeles (LAX)?'}]

  # First API call: Send the query and function description to the model
  response = await client.chat(
    model=model,
    messages=messages,
    tools=[
      {
        'type': 'function',
        'function': {
          'name': 'get_flight_times',
          'description': 'Get the flight times between two cities',
          'parameters': {
            'type': 'object',
            'properties': {
              'departure': {
                'type': 'string',
                'description': 'The departure city (airport code)',
              },
              'arrival': {
                'type': 'string',
                'description': 'The arrival city (airport code)',
              },
            },
            'required': ['departure', 'arrival'],
          },
        },
      },
    ],
  )

  # Add the model's response to the conversation history
  messages.append(response['message'])

  # Check if the model decided to use the provided function
  if not response['message'].get('tool_calls'):
    print("The model didn't use the function. Its response was:")
    print(response['message']['content'])
    return

  # Process function calls made by the model
  if response['message'].get('tool_calls'):
    available_functions = {
      'get_flight_times': get_flight_times,
    }
    for tool in response['message']['tool_calls']:
      function_to_call = available_functions[tool['function']['name']]
      function_response = function_to_call(tool['function']['arguments']['departure'], tool['function']['arguments']['arrival'])
      # Add function response to the conversation
      messages.append(
        {
          'role': 'tool',
          'content': function_response,
        }
      )

  # Second API call: Get final response from the model
  final_response = await client.chat(model=model, messages=messages)
  print(final_response['message']['content'])


# Run the async function
asyncio.run(run('llama2'))

RuntimeError: asyncio.run() cannot be called from a running event loop