# Test prompting local open models through Ollama Python

## Imports

In [53]:
import ollama
from ollama import chat

## Open models installed on local machine

In [54]:
def installed_models():
    '''
    Iterate through ollama models and return names as list
    '''
    return [md.model for md in ollama.list().models]

In [55]:
local_models = installed_models()
local_models

['phi4:latest',
 'deepseek-r1:latest',
 'deepseek-r1:32b',
 'llama3.3:latest',
 'deepseek-coder-v2:16b']

## Stream Chat Responses from a model

In [None]:
def get_stream(model_name: str, prompt_msg: str):
    '''get a streaming chat from a model
    '''
    stream = chat(
        model=model_name,
        messages=[{'role': 'user', 'content': prompt_msg}],
        stream=True,
    )

    return stream

In [None]:
prompt_1 = """
I would like to code a simpy 4.1 model in Python of an M/M/s queuing system. 
I would like to code this in stages. The first stage is to create a simpy generator function
for arrivals. I would like to be able to set the run length and the arrival rate. Use numpy for sampling.
Do not output any explantory text. Only provide python code."""

In [None]:
# use coder first.
stream = get_stream(local_models[3], prompt_1)

In [None]:
with open('response_1.txt', 'w') as writer:
    for chunk in stream:
      print(chunk['message']['content'], end='', flush=True)
      writer.write(chunk['message']['content'])

# critique with 2nd model

In [None]:
def read_response(file_name):
    response = ""
    with open(file_name, 'r') as reader:
        for line in reader:
            response += line.rstrip('\n')
    return response

In [None]:
prompt_2 = ""
prompt_2_postfix = "Please critique the simpy and python code included in the text below. Check the code for bugs and that it uses simpy correctly. Improve the code."
last_response = read_response("response_1.txt")
prompt_2 = prompt_2_postfix + last_response

In [None]:
stream = get_stream(local_models[1], prompt_2)

In [None]:
for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)

In [1]:
def format_message(history: list, role: str, content: str):
    prompt = {
        'role': role,
        'content': content
    }
    history.append(prompt)
    return history

In [20]:
def get_stream(model_name: str, chat_history: list):
    '''get a streaming chat from a model
    '''
    stream = chat(
        model=model_name,
        messages=chat_history,
        stream=True,
    )

    return stream

In [91]:
msg_history = []

prompt_1 = """
Main command: 
Code a Python and SimPy 4.1 discrete-event simulation model of a MMs
The code will be added in stages. In this first stage only the arrival want to model the arrival processes. 
I would like to be able to set the run length and the arrival rate. Use a numpy Generator for sampling.
Do not output any introductory or explantory text. Only provide python code."""

# update history
msg_history = format_message(msg_history, 'user', prompt_1)

In [92]:
critique_prompt = """
Analyse the last version of the python model produced in our discussion.  Identify bugs, mistakes and hallucinations in the simpy and
python code.  Fix them and provide a new version of the model that is an improvement. If there are no mistakes 
or problems then write "model stage complete".
"""

In [93]:
ITERATIONS = 2
model_id = 0


for i in range(ITERATIONS):
    print(f"********* ITERATION {i} *******************")
    print("")
    stream = get_stream(local_models[model_id], msg_history)
    response = ""
    with open(f'response_{i}.txt', 'w') as writer:
        for chunk in stream:
          print(chunk['message']['content'], end='', flush=True)
          writer.write(chunk['message']['content'])
          response += (chunk['message']['content'] + "\n")
        # store the response in history
        msg_history = format_message(msg_history, 'assistant', response)
        msg_history = format_message(msg_history, 'user', critique_prompt)

********* ITERATION 0 *******************

python
import simpy
import numpy as np

class MMSSimulation:
    def __init__(self, env, arrival_rate):
        self.env = env
        self.arrival_rate = arrival_rate
        self.process = env.process(self.run())
    
    def run(self):
        # Initialize the random number generator for exponential distribution
        rng = np.random.default_rng()
        
        while True:
            # Sample inter-arrival time using an exponential distribution
            inter_arrival_time = rng.exponential(1.0 / self.arrival_rate)
            
            # Schedule the next arrival
            yield self.env.timeout(inter_arrival_time)

def main():
    # Parameters for the simulation
    run_length = 100  # Simulation run length in time units
    arrival_rate = 5  # Arrival rate (lambda) of customers per unit time
    
    # Create a SimPy environment
    env = simpy.Environment()
    
    # Initialize the MMSSimulation with the given environment 

In [48]:
import simpy
import numpy as np

class MMkModel:
    def __init__(self, env, num_servers, arr_rate):
        self.env = env
        self.num_servers = num_servers
        self.arr_rate = arr_rate
        self.server = simpy.Resource(env, capacity=num_servers)
        self.inter_arrival_times = []
        
    def arrival_process(self):
        while True:
            inter_arrival_time = np.random.exponential(1/self.arr_rate)
            yield self.env.timeout(inter_arrival_time)
            self.inter_arrival_times.append(inter_arrival_time)

# Setup and run
np_rng = np.random.default_rng()
env = simpy.Environment()
model = MMkModel(env, num_servers=2, arr_rate=5.0)
env.process(model.arrival_process())
env.run(until=1000.0)

In [51]:
np.asarray(model.inter_arrival_times).mean()

np.float64(0.20461854058668488)

In [62]:
import simpy
import numpy as np

def arrivals(env, arrival_rate):
    """Generate arrivals based on an exponential distribution."""
    while True:
        # Sample inter-arrival time from an exponential distribution
        inter_arrival_time = np.random.exponential(1 / arrival_rate)
        yield env.timeout(inter_arrival_time)  # Wait for the next event
        print(f"Arrival at {env.now:.2f}")

def run_simulation(run_length, arrival_rate):
    """Run the simulation with specified parameters."""
    env = simpy.Environment()
    # Start the arrivals process
    env.process(arrivals(env, arrival_rate))
    # Run the simulation for the given run length
    env.run(until=run_length)

# Example usage:
run_simulation(run_length=100.0, arrival_rate=1.0)

Arrival at 0.15
Arrival at 0.20
Arrival at 0.25
Arrival at 0.52
Arrival at 1.17
Arrival at 1.92
Arrival at 2.31
Arrival at 2.57
Arrival at 2.60
Arrival at 2.65
Arrival at 3.09
Arrival at 6.17
Arrival at 9.36
Arrival at 9.58
Arrival at 10.18
Arrival at 11.57
Arrival at 12.54
Arrival at 14.48
Arrival at 15.81
Arrival at 16.54
Arrival at 17.63
Arrival at 17.80
Arrival at 20.32
Arrival at 23.73
Arrival at 26.58
Arrival at 28.44
Arrival at 30.08
Arrival at 30.94
Arrival at 31.29
Arrival at 31.38
Arrival at 32.13
Arrival at 32.95
Arrival at 34.18
Arrival at 34.36
Arrival at 35.97
Arrival at 36.00
Arrival at 36.14
Arrival at 38.12
Arrival at 38.13
Arrival at 39.20
Arrival at 39.75
Arrival at 44.17
Arrival at 44.18
Arrival at 44.19
Arrival at 47.78
Arrival at 50.10
Arrival at 50.22
Arrival at 51.14
Arrival at 51.42
Arrival at 51.85
Arrival at 51.91
Arrival at 53.13
Arrival at 53.61
Arrival at 54.87
Arrival at 56.15
Arrival at 58.88
Arrival at 59.96
Arrival at 61.11
Arrival at 61.71
Arrival at 

In [76]:
import simpy
import numpy as np

class MMSystem:
    def __init__(self, env, arrival_rate):
        self.env = env
        self.arrival_rate = arrival_rate
        self.n_arrivals = 0
        
        # Generator for inter-arrival times
        self.inter_arrival_times = self._generate_inter_arrival_times()
        
        # Process to handle arrivals
        self.env.process(self.run())

    def _generate_inter_arrival_times(self):
        """Generator function for exponentially distributed inter-arrival times."""
        while True:
            yield np.random.exponential(1.0 / self.arrival_rate)

    def run(self):
        """Main process to simulate arrivals in the system."""
        while True:
            # Get next inter-arrival time
            inter_arrival_time = next(self.inter_arrival_times)
            
            # Increment arrival counter
            self.n_arrivals += 1
            
            # Wait for the inter-arrival time duration
            yield self.env.timeout(inter_arrival_time)

def simulate_mms(run_length, arrival_rate):
    env = simpy.Environment()
    mm_system = MMSystem(env, arrival_rate)
    
    # Run the simulation
    env.run(until=run_length)
    
    return mm_system.n_arrivals

# Example usage
if __name__ == "__main__":
    run_length = 100.0  # Time units
    arrival_rate = 5    # Arrivals per time unit
    
    total_arrivals = simulate_mms(run_length, arrival_rate)
    
    print(f"Total arrivals: {total_arrivals}")

Total arrivals: 526
