# External lab: Create a RAG with LLM using your own data

## Concepts Covered

* Retrieval Augmented Generation

* Large Language Models using Llamafile

* Using Vector databases like Qdrant

* Creating embeddings with Sentence Transformers

* Using OpenAI's Python API to connect to the LLM and produce responses

In [29]:
import pandas as pd
from sklearn.datasets import load_wine
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [30]:
# Load dataset
df = pd.DataFrame(load_wine().data, columns=load_wine().feature_names)
df["target"] = load_wine().target

df

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2


In [31]:
df['target'].unique()

array([0, 1, 2])

In [32]:
# Change the target names from numbers to strings
# 0 -> 'Barolo', 1 -> 'Grignolino', 2 -> 'Barbera'
df['target'] = df['target'].map({0: 'Barolo', 1: 'Grignolino', 2: 'Barbera'})

In [33]:
#Convert data to dictionary
data = df.to_dict(orient='records')

In [34]:
# Initialize encoder
encoder = SentenceTransformer("all-MiniLM-L6-v2")  # Model to create embeddings

In [35]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [36]:
# Collection name
collection_name = "wines_quality"

# Check if the collection exists, then create or delete as needed
if qdrant.collection_exists(collection_name):
    qdrant.delete_collection(collection_name)

# Create a new collection
qdrant.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(),  # Vector size from model
        distance=models.Distance.COSINE
    )
)

# Vectorize and upload points
qdrant.upload_points(
    collection_name=collection_name,
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(str(doc["target"])),  # Convert int to string
            payload=doc
        ) for idx, doc in enumerate(data)
    ]
)

print(f"Collection '{collection_name}' created successfully and data uploaded.")

Collection 'wines_quality' created successfully and data uploaded.


In [37]:
user_prompt = "Suggest me a wine with lowest alcalinity_of_ash content."

In [38]:
# Search for the best matching wines
hits = qdrant.query_points(
    collection_name=collection_name,
    query= encoder.encode(user_prompt),
    limit=20
)

# Print results
for hit in hits.points:
    print(hit.id, hit.payload, 'score=', hit.score)


54 {'alcohol': 13.74, 'malic_acid': 1.67, 'ash': 2.25, 'alcalinity_of_ash': 16.4, 'magnesium': 118.0, 'total_phenols': 2.6, 'flavanoids': 2.9, 'nonflavanoid_phenols': 0.21, 'proanthocyanins': 1.62, 'color_intensity': 5.85, 'hue': 0.92, 'od280/od315_of_diluted_wines': 3.2, 'proline': 1060.0, 'target': 'Barolo'} score= 0.20527194600974985
55 {'alcohol': 13.56, 'malic_acid': 1.73, 'ash': 2.46, 'alcalinity_of_ash': 20.5, 'magnesium': 116.0, 'total_phenols': 2.96, 'flavanoids': 2.78, 'nonflavanoid_phenols': 0.2, 'proanthocyanins': 2.45, 'color_intensity': 6.25, 'hue': 0.98, 'od280/od315_of_diluted_wines': 3.03, 'proline': 1120.0, 'target': 'Barolo'} score= 0.20527194600974985
39 {'alcohol': 14.22, 'malic_acid': 3.99, 'ash': 2.51, 'alcalinity_of_ash': 13.2, 'magnesium': 128.0, 'total_phenols': 3.0, 'flavanoids': 3.04, 'nonflavanoid_phenols': 0.2, 'proanthocyanins': 2.08, 'color_intensity': 5.1, 'hue': 0.89, 'od280/od315_of_diluted_wines': 3.53, 'proline': 760.0, 'target': 'Barolo'} score= 0.

In [39]:
# Define a variable to hold the search results including ID and payload
search_results = [{"id": hit.id, "payload": hit.payload} for hit in hits.points]

print(type(search_results))  # Debugging check

# Ensure search_results is properly formatted as a string
if isinstance(search_results, list):
    # Convert all three selected results into a formatted string
    formatted_search_results = "\n".join([f"ID: {item['id']}, Data: {item['payload']}" for item in search_results])
else:
    formatted_search_results = str(search_results)  # Convert to string if needed

print(type(formatted_search_results))  # Debugging check
print(formatted_search_results)  # Print the final formatted output


<class 'list'>
<class 'str'>
ID: 54, Data: {'alcohol': 13.74, 'malic_acid': 1.67, 'ash': 2.25, 'alcalinity_of_ash': 16.4, 'magnesium': 118.0, 'total_phenols': 2.6, 'flavanoids': 2.9, 'nonflavanoid_phenols': 0.21, 'proanthocyanins': 1.62, 'color_intensity': 5.85, 'hue': 0.92, 'od280/od315_of_diluted_wines': 3.2, 'proline': 1060.0, 'target': 'Barolo'}
ID: 55, Data: {'alcohol': 13.56, 'malic_acid': 1.73, 'ash': 2.46, 'alcalinity_of_ash': 20.5, 'magnesium': 116.0, 'total_phenols': 2.96, 'flavanoids': 2.78, 'nonflavanoid_phenols': 0.2, 'proanthocyanins': 2.45, 'color_intensity': 6.25, 'hue': 0.98, 'od280/od315_of_diluted_wines': 3.03, 'proline': 1120.0, 'target': 'Barolo'}
ID: 39, Data: {'alcohol': 14.22, 'malic_acid': 3.99, 'ash': 2.51, 'alcalinity_of_ash': 13.2, 'magnesium': 128.0, 'total_phenols': 3.0, 'flavanoids': 3.04, 'nonflavanoid_phenols': 0.2, 'proanthocyanins': 2.08, 'color_intensity': 5.1, 'hue': 0.89, 'od280/od315_of_diluted_wines': 3.53, 'proline': 760.0, 'target': 'Barolo'}
I

In [None]:
len(formatted_search_results)
#The Llama-3.2-1B-Instruct.Q6_K model has a maximum context window size of 128,000 tokens

6468

# Use LLAMA File to run locally
https://github.com/Mozilla-Ocho/llamafile

Command: Llama-3.2-1B-Instruct.Q6_K.exe --server --v2 -ngl 9999

Note: A llama.log file will be created automatically; use it to get the details about the model

One the above is up and running, move forward.

In [40]:
# Testing if the model is up and running
from openai import OpenAI
client = OpenAI(
    base_url="http://localhost:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests."},
        {"role": "user", "content": "Write a limerick about python exceptions"}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content='There once was a Python so bright,\nWhose exceptions were a wondrous sight.\nThe `ValueError` did show,\nWhen a number was too low,\nAnd `ZeroDivisionError` took flight.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


In [41]:
# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://127.0.0.1:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)

# Ensure model name is correct (check from the API)
model_name = "LLaMA_CPP" 

# Query the model
print("Runnning the model...")
completion = client.chat.completions.create(
    model=model_name,
    messages=[
        {"role": "system", "content": "You are a chatbot, a wine specialist. Your priority is to help users select wines based on their needs. Only select one wine (Target) from the available options given in the content. Ensure that you mention the wine name (target) as well as the wine id."},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": formatted_search_results}  # Ensure variable exists
    ]
)

print(completion.choices[0].message)

Runnning the model...
ChatCompletionMessage(content="ID: 36, Data: {'alcohol': 13.28, 'malic_acid': 1.64, 'ash': 2.84, 'alcalinity_of_ash': 15.2, 'magnesium': 110.0, 'total_phenols': 2.6, 'flavanoids': 2.68, 'nonflavanoid_phenols': 0.34, 'proanthocyanins': 1.36, 'color_intensity': 4.6, 'hue': 1.09, 'od280/od315_of_diluted_wines': 2.78, 'proline': 880.0, 'target': 'Barolo'}\n\nThis wine has the lowest alcalinity_of_ash content among the options provided, making it a suitable choice for those seeking a balanced and approachable Barolo.", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


In [42]:
print(completion.choices[0].message.content)

ID: 36, Data: {'alcohol': 13.28, 'malic_acid': 1.64, 'ash': 2.84, 'alcalinity_of_ash': 15.2, 'magnesium': 110.0, 'total_phenols': 2.6, 'flavanoids': 2.68, 'nonflavanoid_phenols': 0.34, 'proanthocyanins': 1.36, 'color_intensity': 4.6, 'hue': 1.09, 'od280/od315_of_diluted_wines': 2.78, 'proline': 880.0, 'target': 'Barolo'}

This wine has the lowest alcalinity_of_ash content among the options provided, making it a suitable choice for those seeking a balanced and approachable Barolo.
