## Ollama 
* genration and chat completion
    * [Ollama  completion refrence](https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion).

In [6]:
import requests
import json

# Define the API URL for the Ollama generation endpoint
generation_api_url = "http://localhost:11434/api/generate"

# Specify the model to be used for text generation
model = "llama3.2:latest"

# Define the input text to be summarized
text = "Hello, how are you?"

# Send a POST request to the Ollama API with the required parameters
response = requests.post(
    generation_api_url,
    json={
        "model": model,  # Specify the model to use
        "prompt": f"Summarize the tone, intent, and structure of the following text in 3 bullet points: {text}",  # Define the task prompt
        "stream": False,  
        "options": {  
            "temperature": 0.7,  
            "top_p": 0.9,  
            "top_k": 50,  
            "repetition_penalty": 1.2  # Penalize repetition in the output
        }
    }
)
response.json()

{'model': 'llama3.2:latest',
 'created_at': '2025-05-02T15:11:08.327216473Z',
 'response': 'Here is a summary of the tone, intent, and structure of the given text:\n\n• **Tone:** The tone of this sentence is informal, friendly, and welcoming. It sets a casual and approachable tone.\n• **Intent:** The intent of this sentence is to initiate a conversation or establish a connection with the person being addressed. It\'s a greeting that invites the other person to respond.\n• **Structure:** The structure of this sentence is simple and straightforward. It consists of a single sentence with a short question ("how are you?") followed by a polite expression of inquiry.',
 'done': True,
 'done_reason': 'stop',
 'context': [128006,
  9125,
  128007,
  271,
  38766,
  1303,
  33025,
  2696,
  25,
  6790,
  220,
  2366,
  18,
  271,
  128009,
  128006,
  882,
  128007,
  271,
  9370,
  5730,
  553,
  279,
  16630,
  11,
  7537,
  11,
  323,
  6070,
  315,
  279,
  2768,
  1495,
  304,
  220,
  18,

In [8]:
response.json()['response']

'Here is a summary of the tone, intent, and structure of the given text:\n\n• **Tone:** The tone of this sentence is informal, friendly, and welcoming. It sets a casual and approachable tone.\n• **Intent:** The intent of this sentence is to initiate a conversation or establish a connection with the person being addressed. It\'s a greeting that invites the other person to respond.\n• **Structure:** The structure of this sentence is simple and straightforward. It consists of a single sentence with a short question ("how are you?") followed by a polite expression of inquiry.'

In [9]:
from pprint import pprint as pprint
pprint(response.json()['response'])

('Here is a summary of the tone, intent, and structure of the given text:\n'
 '\n'
 '• **Tone:** The tone of this sentence is informal, friendly, and welcoming. '
 'It sets a casual and approachable tone.\n'
 '• **Intent:** The intent of this sentence is to initiate a conversation or '
 "establish a connection with the person being addressed. It's a greeting that "
 'invites the other person to respond.\n'
 '• **Structure:** The structure of this sentence is simple and '
 'straightforward. It consists of a single sentence with a short question '
 '("how are you?") followed by a polite expression of inquiry.')


##### chat completion

In [13]:
import json
import requests

# Define the API URL for the Ollama chat endpoint
chat_api_url = "http://localhost:11434/api/chat"

# Specify the model to be used for chat
model = "llama3.2:latest"

# Define the input text for the chat
text = "Hello! what are the beautiful things in egypt?"

# Send a POST request to the Ollama API with the required parameters    
response = requests.post(
    chat_api_url,
    json={
        "model": model,  
        "messages": [
            {"role": "user",
            "content": text}  
        ],
        "stream": False,  
        "options": {  
            "temperature": 0.7,  
            "top_p": 0.9,  
            "top_k": 50,
            "seed" : 101,  
            "repetition_penalty": 1.2,
            "num_predict": 100,
        }
    }
)
response.json()

{'model': 'llama3.2:latest',
 'created_at': '2025-05-02T15:29:00.117193313Z',
 'message': {'role': 'assistant',
  'content': 'Egypt is a country rich in history, culture, and natural beauty. Here are some of the most beautiful things in Egypt:\n\n1. **Pyramids of Giza**: No list of Egyptian wonders would be complete without these iconic pyramids, one of the Seven Wonders of the Ancient World.\n2. **Nile River**: The lifeblood of ancient Egypt, the Nile is a stunning river that winds its way through the heart of the country, offering breathtaking views and opportunities for boat rides and fel'},
 'done_reason': 'length',
 'done': True,
 'total_duration': 5736191793,
 'load_duration': 23456757,
 'prompt_eval_count': 36,
 'prompt_eval_duration': 14679662,
 'eval_count': 100,
 'eval_duration': 5697555450}

In [14]:
from pprint import pprint as pprint

pprint(response.json()['message']['content'])

('Egypt is a country rich in history, culture, and natural beauty. Here are '
 'some of the most beautiful things in Egypt:\n'
 '\n'
 '1. **Pyramids of Giza**: No list of Egyptian wonders would be complete '
 'without these iconic pyramids, one of the Seven Wonders of the Ancient '
 'World.\n'
 '2. **Nile River**: The lifeblood of ancient Egypt, the Nile is a stunning '
 'river that winds its way through the heart of the country, offering '
 'breathtaking views and opportunities for boat rides and fel')


In [None]:
from helpers import get_settings
from openai import OpenAI
import os 

setting = get_settings()

client = OpenAI(
    api_key = setting.OPENAI_API_KEY
)

llm_model = setting.LLM_MODEL

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import torch

from langchain_huggingface import HuggingFacePipeline

In [None]:
# Model and tokenizer setup
model_id = env_values['MODEL_ID']

# Configure 8-bit quantization
quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
base_model = AutoModelForCausalLM.from_pretrained(model_id,
                                                quantization_config=quantization_config,
                                                device_map='auto')

In [None]:
# Set up pipeline
pipe = pipeline("text-generation",
                model=base_model,
                tokenizer=tokenizer,
                max_length=256,
                truncation=True,  # Explicitly enable truncation
                do_sample=True,
                temperature=0.6,
                top_p=0.95,
                repetition_penalty=1.2)

# Initialize LangChain HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Suggest 2 ways to lose my weight.

Answer:"""

print(llm.invoke(template))

In [None]:
prompt_1 = """
Suggest 2 ways to lose my weight.
""".strip()

prompt_2 = """
Tell me a joke
""".strip()

llm_results = llm.generate([ prompt_1, prompt_2 ])
llm_results.generations[1][0].text

In [None]:
# Direct pipeline usage
pipeline_output = pipe(template, return_full_text=False)
print("Pipeline Output:", pipeline_output[0]['generated_text'])