In [1]:
# 1. Install required dependencies
!pip install huggingface_hub
!pip install transformers
!pip install accelerate
!pip install einops
!apt-get update && apt-get install wget curl git -y

Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.8.0
Get:1 https://packages.cloud.google.com/apt gcsfuse-focal InRelease [1227 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1581 B]
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease              
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]        
Get:6 https://packages.cloud.google.com/apt cloud-sdk InRelease [1618 B]       
Hit:7 https://packages.cloud.google.com/apt google-fast-socket InRelease       
Get:8 https://packages.cloud.google.com/apt gcsfuse-focal/main amd64 Packages [28.6 kB]
Get:9 https://developer.

In [4]:
# 2. Download and Install Ollama
!curl -fsSL https://ollama.com/install.sh | sh

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> NVIDIA GPU installed.
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [5]:
# 3. Create a helper function to manage Ollama processes
def start_ollama_server():
    import subprocess
    import time
    
    # Start Ollama server in the background
    process = subprocess.Popen(['ollama', 'serve'], 
                             stdout=subprocess.PIPE, 
                             stderr=subprocess.PIPE)
    time.sleep(5)  # Wait for server to start
    return process

def stop_ollama_server(process):
    process.terminate()
    process.wait()

# 4. Function to pull and run models
def setup_ollama_model(model_name="llama2"):
    import subprocess
    
    # Pull the model
    subprocess.run(['ollama', 'pull', model_name])

# 5. Create a function for interacting with the model
def query_ollama(prompt, model_name="llama2"):
    import subprocess
    import json
    
    # Run the model with the prompt
    result = subprocess.run(
        ['ollama', 'run', model_name, prompt],
        capture_output=True,
        text=True
    )
    
    return result.stdout

In [7]:
# 6. Main execution
if __name__ == "__main__":
    # Start Ollama server
    server_process = start_ollama_server()
    
    try:
        # Setup model
        model_name = "llama2"  # You can change this to other models
        print(f"Setting up {model_name}...")
        setup_ollama_model(model_name)
        
        # Example prompts
        prompts = [
            "What is machine learning?",
            "Explain the concept of neural networks briefly.",
            "What is the difference between supervised and unsupervised learning?"
        ]
        
        # Run queries
        for prompt in prompts:
            print("\nPrompt:", prompt)
            print("Response:", query_ollama(prompt, model_name))
            
    finally:
        # Stop Ollama server
        stop_ollama_server(server_process)

Setting up llama2...


[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest 
pulling 8934d96d3f08... 100% ▕████████████████▏ 3.8 GB                         
pulling 8c17c2ebb0ea... 100% ▕████████████████▏ 7.0 KB                         [?25h[?25l[2K[1G[A[2K[1G[A[2K[1Gpulling manifest 
pulling 8934d96d3f08... 100% ▕████████████████▏ 3.8 GB                         
pulling 8c17c2ebb0ea... 100% ▕████████████████▏ 7.0 KB                         
pulling 7c23fb36d801... 100% ▕████████████████▏ 4.8 KB                         
pulling 2e0493f67d0c... 100% ▕████████████████▏   59 B                         
pulling fa304d675061... 100% ▕████████████████▏   91 B                         
pulling 42ba7f8a01dd... 100% ▕████████████████▏  557 B                         
verifying sha256 digest 
writing manifest 
success [?25h



Prompt: What is machine learning?
Response: 
Machine learning is a subfield of artificial intelligence (AI) that involves the use of algorithms and statistical models to enable machines to learn from data, make decisions, and improve their performance on a specific task over time.

In traditional programming, a computer is given a set of rules to follow to perform a task. In contrast, machine learning enables a computer to learn from experience and improve its performance without being explicitly programmed. This is achieved by using algorithms that can analyze data and identify patterns, making it possible for the computer to make decisions or take actions without being explicitly told how to do so.

Machine learning has many applications, including:

1. Image recognition: Machine learning algorithms can be trained on a large dataset of images to recognize objects, faces, and other patterns.
2. Natural language processing: Machine learning can be used to analyze and understand natura

In [8]:
# 7. Additional utility functions for model management
def list_available_models():
    import subprocess
    result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
    return result.stdout

def remove_model(model_name):
    import subprocess
    subprocess.run(['ollama', 'rm', model_name])

# Example usage of utility functions
print("\nAvailable models:")
print(list_available_models())

# 8. Advanced usage with streaming responses
def stream_ollama_response(prompt, model_name="llama2"):
    import subprocess
    import sys
    
    process = subprocess.Popen(
        ['ollama', 'run', model_name, prompt],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        universal_newlines=True
    )
    
    # Stream the output
    for line in process.stdout:
        sys.stdout.write(line)
        sys.stdout.flush()
    
    process.wait()

# Example of streaming response
print("\nStreaming response example:")
stream_ollama_response("Write a short poem about AI")

# 9. Error handling wrapper
def safe_ollama_query(prompt, model_name="llama2"):
    try:
        return query_ollama(prompt, model_name)
    except Exception as e:
        return f"Error occurred: {str(e)}"

# 10. Model performance monitoring
def benchmark_model(model_name="llama2", num_queries=5):
    import time
    
    total_time = 0
    test_prompt = "What is artificial intelligence?"
    
    for i in range(num_queries):
        start_time = time.time()
        query_ollama(test_prompt, model_name)
        end_time = time.time()
        total_time += (end_time - start_time)
    
    avg_time = total_time / num_queries
    print(f"Average response time for {model_name}: {avg_time:.2f} seconds")

# Run benchmark
print("\nRunning benchmark:")
benchmark_model()



Available models:


Streaming response example:

Running benchmark:
Average response time for llama2: 0.02 seconds
