sudo docker run --runtime nvidia --gpus all     -v ~/.cache/huggingface:/root/.cache/huggingface     --env "HUGGING_FACE_HUB_TOKEN=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"     -p 8000:8000     --ipc=host --     vllm/vllm-openai:latest  --model mistralai/Pixtral-12B-2409     --tokenizer_mode mistral     --load_format mistral     --config_format mistral  --max_model_len 12000   --tensor-parallel-size 4  --limit-mm-per-prompt image=3  --gpu-memory-utilization 0.9 


### 0.Requirement and Importation

In [2]:
import tqdm
from tqdm import tqdm
from PIL import Image
import os

In [None]:
# Colpali Model
from colpali_engine.models import ColPali

# Colpali queries and images preprocessing
from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor

# Retruever Processor
from colpali_engine.utils.processing_utils import BaseVisualRetrieverProcessor

# Accelerate calculations
from colpali_engine.utils.torch_utils import ListDataset, get_torch_device

# Pytorch Data Loader Object
from torch.utils.data import DataLoader

# Pytorch Library
import torch

# Type Validation
from typing import List, cast

# cosine similarity
from sklearn.metrics.pairwise import cosine_similarity

# 
import re

#
from IPython.display import display, Markdown

### 1. Embedding Processus

#### 1.0. Downloading and Configuring the Colpali Model 

In [None]:
device = get_torch_device('cuda')
model_name = "vidore/colpali-v1.2"
model = ColPali.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map=device,
).eval()


#### 1.1. Query Embeddings

##### a. function

In [5]:
# Used to process queries and images to fit the model's input requirements beforehand
processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))


In [74]:
# Embed the queries.
def queries_embedding(queries : list):
  # Create a DataLoader to iterate over the list of queries, processing each query
  # individually to fit model input requirements
  dataloader = DataLoader(
      dataset=ListDataset[str](queries),
      batch_size=1,
      shuffle=False,
      collate_fn=lambda x: processor.process_queries(x),
  )
  qs: List[torch.Tensor] = []

  for batch_query in dataloader:
      with torch.no_grad():
          batch_query = {k: v.to(model.device) for k, v in batch_query.items()}
          embeddings_query = model(**batch_query)
        #   print(embeddings_query)
      qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
  return qs

##### b. query embedding


In [75]:
query = ["Explain the business definition 03?"] 

In [None]:
query

<span style="color: brown;">The query should be a list, not a string!</span>

In [77]:
qs = queries_embedding(query)
# qs[0].shape

In [91]:
type(qs[0])

torch.Tensor

#### 1.2. Document Embedding Process

In [24]:
from pymilvus import connections, Collection, utility
# from pdf2jpg import pdf2jpg
from pdf2image import convert_from_path
import os

##### 1.2.1. Convert PDF to PNG images.

In [6]:
import os

def list_files_in_directory(directory_path):
    """
    Lists all file paths in the specified directory and its subdirectories.

    :param directory_path: Path to the target directory
    :return: List of file paths
    """
    file_paths = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            file_paths.append(os.path.join(root, file))
    return file_paths

# Example usage
directory = 'input/phase_01_documents_02'
all_files = list_files_in_directory(directory)
for file_path in all_files:
    print(file_path)

In [12]:
all_files = list_files_in_directory("input_quick_ingestion")

In [None]:
all_files

In [14]:
# Document Path
pdfs_paths = "input_quick_ingestion"
# The Output Folder Path
output_folder = "output_quick_ingestion"

In [15]:
def conver_pdf2image(pdf_path, output_folder):
    # Images List
    images = convert_from_path(pdf_path=pdf_path, dpi=150, fmt='png')

    # Extract Path
    basename = os.path.splitext(os.path.basename(pdf_path))
    for page_num, page in enumerate(images, start=1):
        image_name = f"{basename[0]}_page_{page_num}.png"
        image_path = os.path.join(output_folder,image_name)
        page.save(image_path)

In [16]:
for pdf_path in all_files :
    conver_pdf2image(pdf_path,output_folder)

##### 1.2.2. pages embedding

In [17]:
image_list = []
for filename in os.listdir(output_folder):
    if filename.lower().endswith(".png"):
        file_path = os.path.join(output_folder, filename)
        image = Image.open(file_path)
        image_list.append(image)
image_list

[<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngImagePlugin.PngImageFile image mode=RGB size=1275x1651>,
 <PIL.PngI

In [20]:
# Ensure that all pages are converted, and none are missed
import os
from PyPDF2 import PdfReader
folder_path = "output_quick_ingestion"
def calculate_total_pages(folder_path):
    total_pages = 0
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".pdf"):
            pdf_path = os.path.join(folder_path, filename)
            try:
                reader = PdfReader(pdf_path)
                total_pages += len(reader.pages)
            except Exception as e:
                print(f"Error reading {filename}: {e}")
    return total_pages

# Example usage
folder_path = folder_path  # Replace with the path to your folder
total_pages = calculate_total_pages(folder_path)
print(f"Total number of pages in all PDFs: {total_pages}")

Total number of pages in all PDFs: 0


In [21]:
dataloader = DataLoader(
    dataset=ListDataset[str](image_list),
    batch_size=1,
    shuffle=False,
    collate_fn=lambda x: processor.process_images(x),
)

In [22]:
ds: List[torch.Tensor] = []
for batch_doc in tqdm(dataloader):
    with torch.no_grad():
        # Move all tensors in batch_doc to the same device as the model weights,
        # but keep 'input_ids' as Long or Int.
        batch_doc = {
            k: v.to(dtype=torch.bfloat16, device=device) if k != "input_ids" else v.to(device=device)
            for k, v in batch_doc.items()
        }
        embeddings_doc = model(**batch_doc)
    ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))

100%|██████████| 108/108 [05:03<00:00,  2.81s/it]


In [23]:
len(ds)

108

In [24]:
ds[0].shape

torch.Size([1030, 128])

#### 2. Retrieval Process

#### 2.1. Create a collection

In [6]:
from pymilvus import MilvusClient, DataType, connections, utility
import numpy as np
import concurrent.futures
client = MilvusClient(uri="http://localhost:19530")

from pymilvus import Collection, connections
connections.connect("default", host="localhost", port="19530")

sudo docker run --runtime nvidia --gpus all     -v ~/.cache/huggingface:/root/.cache/huggingface     --env "HUGGING_FACE_HUB_TOKEN=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"     -p 8000:8000     --ipc=host --     vllm/vllm-openai:latest  --model mistralai/Pixtral-12B-2409     --tokenizer_mode mistral     --load_format mistral     --config_format mistral  --max_model_len 12000   --tensor-parallel-size 4  --limit-mm-per-prompt image=3  --gpu-memory-utilization 0.9 
n("phase01") 

In [None]:
client.list_collections()

In [None]:
# client.rename_collection("OperationsITDemandandDelivery", 
#                          "AdvanceSupportBusinessGovernmentSolutions")

In [26]:
collection_name = "CosineTest"

In [29]:
# Initialize the retriever with a Milvus client, collection name, and dimensionality of the vector embeddings.
# If the collection exists, load it.
# if client.has_collection(collection_name=collection_name):
#     client.load_collection(collection_name)
dim = 128

if client.has_collection(collection_name=collection_name):
    client.drop_collection(collection_name=collection_name)

schema = client.create_schema(
    auto_id=True,
    enable_dynamic_fields=True,
)
schema.add_field(field_name="pk", datatype=DataType.INT64, is_primary=True)
schema.add_field(
    field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=dim
)
schema.add_field(field_name="seq_id", datatype=DataType.INT16)
schema.add_field(field_name="doc_id", datatype=DataType.INT64)

# The page path is stored here.
schema.add_field(field_name="doc", datatype=DataType.VARCHAR, max_length=65535)

client.create_collection(
    collection_name=collection_name, schema=schema
)

In [None]:
client.list_collections()

In [9]:
collection = Collection("CosineTest")

In [None]:
collection.num_entities

#### 2.2. Create an index

In [None]:
collection_name

In [35]:
# Release from Memory
client.release_collection(collection_name=collection_name)
# 
client.drop_index(
    collection_name=collection_name, index_name="vector"
)

[Index Algorithms](https://milvus.io/docs/index.md?tab=floating)

In [36]:
index_params = client.prepare_index_params()

index_params.add_index(
    field_name="vector",
    index_name="vector_index",
    index_type="HNSW",  # or any other index type you want
    metric_type="COSINE",  # or the appropriate metric type : IP is the default value
    params={
        "M": 16,
        "efConstruction": 500,
    },  # adjust these parameters as needed
)

In [37]:
client.create_index(
    collection_name=collection_name, index_params=index_params, sync=True #to be certain that the indexing process has completed before performing further operations
)

In [38]:
collection.indexes

[<pymilvus.orm.index.Index at 0x7f6640cbcfd0>]

#### 2.3. Insert Data 

In [None]:
filepaths = []
image_folder = "output_quick_ingestion"

for filename in os.listdir(image_folder):
    if filename.lower().endswith(".png"):
        file_path = os.path.join(image_folder, filename)
        filepaths.append(file_path)
filepaths

for i in filepaths:
    i.replace("output_quick_ingestion/","output/phase01")

In [32]:
len(filepaths)

108

##### a. function

In [33]:
def insert(self, data):
    # Insert ColBERT embeddings and metadata for a document into the collection.
    # Embeddings List
    colbert_vecs = [vec for vec in data["colbert_vecs"]]

    # Patches length
    seq_length = len(colbert_vecs)

    doc_ids = [data["doc_id"] for i in range(seq_length)]

    #
    seq_ids = list(range(seq_length))

    docs = data["filepath"]

    # Insert the data as multiple vectors (one for each sequence) along with the corresponding metadata.
    client.insert(
        collection_name,
        [
            {
                "vector": colbert_vecs[i],
                "seq_id": seq_ids[i],
                "doc_id": doc_ids[i],
                "doc": docs[i],
            }
            for i in range(seq_length)
        ],
    )

##### b. excecution 

In [None]:
filepaths

In [35]:
for i in range(len(filepaths)):
    data = {
        "colbert_vecs": ds[i].float().numpy(),
        "doc_id": i,
        "filepath": filepaths[i], #*******************************************************************************************************
    }

    # Insert ColBERT embeddings and metadata for a document into the collection.
    # Embeddings List
    colbert_vecs = [vec for vec in data["colbert_vecs"]]

    # Patches length
    seq_length = len(colbert_vecs)

    doc_ids = [data["doc_id"] for i in range(seq_length)]

    #
    seq_ids = list(range(seq_length))

    # #
    # docs = [""] * seq_length

    #
    docs = [data["filepath"]] * seq_length

    # Insert the data as multiple vectors (one for each sequence) along with the corresponding metadata.
    client.insert(
        collection_name,
        [
            {
                "vector": colbert_vecs[i],
                "seq_id": seq_ids[i],
                "doc_id": doc_ids[i],
                "doc": docs[i],
            }
            for i in range(seq_length)
        ],
    )

In [50]:
collection.name

'OperationsBusinessGovernmentSolutions'

In [None]:
collection.load()
collection.query(expr="doc_id == 14", output_fields=['doc'])

In [48]:
collection.flush()


#### Optional: Check the Data in Milvus Collections

In [11]:
collection.num_entities

59740

In [None]:
# Define the condition to filter vectors with "output_test_images" in the doc field
expr = "doc like '%output_quick_ingestion%'"

try:
    # Perform the deletion
    collection.delete(expr)
    print(f"Successfully deleted vectors with 'doc' field containing 'output_quick_ingestion'.")

except Exception as e:
    print(f"Error deleting vectors: {e}")

In [None]:
expr

In [None]:
collection.num_entities


In [None]:
# collection.load()
# collection.query(expr='doc == "(\'BUSINESS REQUIREMENT DOCUMENT v2 110420241840\', \'.pdf\')_page_1.png"', output_fields=['vector'])

#### 2.4. Similarity Search

##### a. function

In [33]:
topk = 10
collection_name = "CosineTest"

search_params = {"metric_type": "COSINE", "params": {}}
collection.load()

def retriever(qs, collection_name, topk=topk, search_params= search_params):
  def rerank_single_doc(doc_id, data, client, collection_name):
    # Rerank a single document by retrieving its embeddings and calculating the similarity with the query.
    doc_colbert_vecs = client.query(
        collection_name=collection_name,
        filter=f"doc_id in [{doc_id}, {doc_id + 1}]",
        output_fields=["seq_id", "vector", "doc"],
        limit=100,
    )
    doc_vecs = np.vstack(
        [doc_colbert_vecs[i]["vector"] for i in range(len(doc_colbert_vecs))]
    )

    # dot product between the query and the document embeddings
    score = np.dot(data, doc_vecs.T).max(1).mean()

    # # Cosine similarity between the query and the document embeddings
    # # data = data.reshape(1, -1)
    # doc_vecs = doc_vecs.reshape(len(doc_vecs), -1)
    # cosine_sim = cosine_similarity(data, doc_vecs)
    # score = cosine_sim.max(1).mean()

    return (score, doc_id)
    #***************************************************************************

  images_paths = []
  for query in qs:
      #0. Get all the documents that contain at least 1 similar (token-patch)
      query = query.float().numpy()
      results = client.search(
          collection_name,
          query,
          limit=100,
          output_fields=["vector", "seq_id", "doc_id"],
          search_params=search_params,
      )

      #1. Retrieve all document IDs that contain at least one similarity between the query tokens and the document patches
      doc_ids = set()
      for r_id in range(len(results)): # len(number of tokens) : for each query token
          for r in range(len(results[r_id])): # for each similar patch
              doc_ids.add(results[r_id][r]["entity"]["doc_id"]) # add the document_id to the list
      # print(doc_ids)

      #2. Get the maximum similarity score for each query across all documents :
      # Example: The maximum score for the first query with image one is 20, while the maximum score with image two is 12.
      # Therefore, image one is more similar to the query than the other images.

      #2.1. Create the similarity search function
      scores = []
      #2.2. # Run the rerank(document) task in parallel for up to 300 workers
      with concurrent.futures.ThreadPoolExecutor(max_workers=300) as executor:
              futures = {
                  executor.submit(
                      rerank_single_doc, doc_id, query, client, collection_name
                  ): doc_id
                  for doc_id in doc_ids
              }
              for future in concurrent.futures.as_completed(futures):
                  score, doc_id = future.result()
                  scores.append((score, doc_id))

      scores.sort(key=lambda x: x[0], reverse=True)

      if len(scores) >= topk:
            scores = scores[:topk]
      else:
            scores = scores

      for i in scores:
        image_path = collection.query(expr=f"doc_id == {i[-1]}", output_fields=["doc"], limit=1)[0]['doc']
        images_paths.append(image_path)

      return images_paths, scores

##### b. excecution

In [102]:
len(qs[0])

1030

In [103]:
images_paths, scores = retriever(qs = qs, collection_name='CosineTest')

In [None]:
# 100
images_paths

In [None]:
scores

In [None]:
for i in images_paths:
  img = Image.open(i)
  display(img)
  print("*"*150)
  print("*"*150)

In [None]:
torch.cuda.empty_cache()

### Just for Testing: Perform a Similarity Search Between Two Pages

In [38]:
# 1. keep only the top documents with a threshold score of more then 0.4
relevance_threshold = 0.4

high_scores = []

for score in scores :
    if score[0] > relevance_threshold:
        high_scores.append(score)
high_scores

[(0.6118813, 32), (0.6037415, 42), (0.5936704, 36)]

In [None]:
# 2. Perform similarity search between the relevant pages and their adjacent pages
# 2.1. get the paths of the relevant images
relevant_images_paths = []
for i in high_scores:
    image_path = collection.query(expr=f"doc_id == {i[-1]}", output_fields=["doc"], limit=1)[0]['doc']
    relevant_images_paths.append(image_path)
relevant_images_paths

relevant_images_paths


In [None]:
# 2.3. get the paths of the adjacent images
adjacent_images_paths = []  
output_folder_path = "output_test_images"

documents_paths = set()
for path in relevant_images_paths:  
    document_path = path.split('_page_')[0].split('/')[-1]
    documents_paths.add(document_path)
documents_names = list(documents_paths)
documents_names

# for document_path in documents_paths:
#     documet_length = len([f for f in os.listdir(output_folder_path) if re.match(document_path, f)])

In [57]:
collection = Collection("CosineTest")
document_name = documents_names[0]
result = collection.query(expr=f"doc like '%{document_name}_page_3%'", output_fields=['vector'])

In [99]:
type(result[1029]['vector'])
qs = [torch.tensor([res['vector'] for res in result])]

In [100]:
qs

[tensor([[ 0.0583,  0.1514,  0.0996,  ...,  0.0737, -0.0398, -0.0140],
         [ 0.0913,  0.1245, -0.0908,  ...,  0.0586, -0.0093,  0.0481],
         [ 0.0190,  0.0659,  0.0332,  ..., -0.0439, -0.1533, -0.0160],
         ...,
         [ 0.1162, -0.0391, -0.0041,  ...,  0.0238, -0.2031, -0.0422],
         [ 0.0349,  0.0439,  0.0728,  ...,  0.0693, -0.1719, -0.0525],
         [-0.0160,  0.0605,  0.1523,  ...,  0.0859, -0.1543, -0.0723]])]

### 4. Pixtral 12b with vllm

##### a. Initiate the model

In [None]:
# import os
# os.environ["HUGGING_FACE_HUB_TOKEN"] = "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"  # Replace with your actual token

In [None]:
from IPython.display import Markdown,display
# This class is the main interface for running offline inference with the vLLM engine. 
from vllm import LLM
# It enables you to control various aspects of how the model generates text, such as randomness, token selection, and stopping criteria.
from vllm.sampling_params import SamplingParams

# images processing
import base64

In [None]:
model_name = "mistralai/Pixtral-12B-2409"
max_img_per_msg = 3
sampling_params = SamplingParams(max_tokens=256, temperature=0.2)

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
llm = LLM(
    model=model_name,
    tensor_parallel_size=4,
    gpu_memory_utilization=0.90,
    tokenizer_mode="mistral",
    load_format="mistral",
    config_format="mistral",
    max_model_len=16348,
    limit_mm_per_prompt={"image": max_img_per_msg},
)

In [None]:
del llm

# 3.

##### b. Answer generation

In [None]:
query[0]

In [None]:
# images_folder_path="/home/administrator/Desktop/tgi/"
# List of local image file paths
image_paths = images_paths

# Function to encode image to base64
def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Encode images
encoded_images = [encode_image_to_base64(path) for path in images_paths]

# Construct messages with base64-encoded images
messages = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text":"use the context provided in the images to answer  this question : \n"\
              + query[0] },
        ] + [
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
            }
            for encoded_image in encoded_images
        ],
    },
    
]

# Generate response
res = llm.chat(messages=messages, sampling_params=sampling_params)


In [None]:
display(Markdown(res[0].outputs[0].text))

In [None]:
from pipeline import Pipeline

In [None]:
from IPython.display import display, Markdown

In [10]:
from vllm import LLM, SamplingParams


In [None]:
from vllm import LLM, SamplingParams

llm = LLM(
    model="mistralai/Pixtral-12B-2409",
    tensor_parallel_size=4,
    gpu_memory_utilization=0.88,
    tokenizer_mode="mistral",
    load_format="mistral",
    config_format="mistral",
    max_model_len=12000,
    limit_mm_per_prompt={"image": 2},
)


In [22]:
conversations = [
    [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "How can I improve my productivity?"},
    ],
    [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "wHat is thence dfference between ML and DL ?"},
    ],
        [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "wHat is thence dfference between ferrari and nissan ?"},
    ],
            [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "tell me a long story about messi.."},
    ]
    # Add more conversations as needed
]


In [23]:
sampling_params = SamplingParams(max_tokens=512, temperature=0.2, top_k=2, top_p=0.9 )

res = llm.chat(messages=conversations, sampling_params=sampling_params)
answer = res[0].outputs[0].text

Processed prompts: 100%|██████████| 4/4 [00:17<00:00,  4.32s/it, est. speed input: 4.69 toks/s, output: 115.98 toks/s]


In [132]:
import requests
# images processing
import base64

url = 'http://localhost:8000/v1/chat/completions'
headers = {
    'Content-Type': 'application/json',
}

# image_paths = ['cat.jpeg','dog.jpeg', 'falcon.jpeg']

# # Function to encode image to base64
# def encode_image_to_base64(image_path):
#     with open(image_path, "rb") as image_file:
#         return base64.b64encode(image_file.read()).decode('utf-8')
    

# # Encode images
# encoded_images = [encode_image_to_base64(path) for path in image_paths]

user_message = "is this correct 7>1 and 8<9 "
headers = {"Content-Type": "application/json"}
payload = {
"model": "OpenGVLab/InternVL2_5-8B-MPO",#"mistralai/Pixtral-12B-2409",
"messages": [
    # {
    #     "role": "system",
    #     "content": [
    #         {
    #             "type": "text",
    #             "text": "You are a helpful assistant capable of utilizing provided documents (in image format) **only if they are relevant** to the user's query.\nHere are the guidelines for your responses:\n- If the user asks a general question that does not require the provided documents(in image format), respond with an answer based on your general knowledge.\n- If the user's question is independent of the provided context, rely solely on your general knowledge to generate a response.\n- If you cannot answer the user's question accurately using either the provided documents or your general knowledge, respond with: 'I don’t have enough information to answer this question.'\n- For conversational queries like greetings or asking about your capabilities, respond politely and explain your purpose as an AI assistant."
    #         }
    #     ]
    # },
    {
        "role": "user",
        "content": [
            {"type": "text",
            "text": "is this correct 7>1 and 8<9  "},

            # {"type": "image_url",
            # "image_url": {"url": f"data:image/png;base64,{encoded_images[0]}"}}, 
            # {"type": "image_url",
            # "image_url": {"url": f"data:image/png;base64,{encoded_images[1]}"}},
            # {"type": "image_url",
            # "image_url": {"url": f"data:image/png;base64,{encoded_images[2]}"}}               # {"type": "image_url", "image_url": {"url": encoded_images[2]}}  # Corrected
        ]
    }
],
"temperature": 0.7,
# "top_p": 0.95, # top_k is not usually a parameter for LLMs. top_p is more common
"max_tokens": 1024,
"top_k": 5,
# "stream": True
}


answer = requests.post(url, headers=headers, json=payload)


In [133]:
import json

In [None]:
# for line in answer.iter_lines():
#     if line:
#         json_str = line.decode('utf-8')[6:]
#         if json_str.strip() != '[DONE]':
#             data = json.loads(json_str)
#             if 'choices' in data and len(data['choices']) > 0:
#                 # print(data['choices'][0]['delta']['content'])
#                 print(data)


In [None]:
print(answer.json()['choices'][0]['message']['content'])

In [None]:
outputs = llm.chat(messages=conversations, sampling_params=sampling_params)

# for output in outputs:
#     response = output.outputs[0].text
#     print(response)


In [None]:
import urllib.parse

user_message = "what about 4/2 is equal to what"
encoded_message = urllib.parse.quote(user_message, safe="")
print(encoded_message)  # Output: what%20about%204%2F2%20is%20equal%20to%20what
