In [3]:
import pandas as pd

CSV_PATH = "../wine-ratings.csv"

# Check if the CSV file exists
if not os.path.exists(CSV_PATH):
    print(f"❌ ERROR: CSV file not found at {CSV_PATH}")
else:
    df = pd.read_csv(CSV_PATH)
    print(f"✅ CSV Loaded Successfully! Number of rows: {len(df)}")
    print(df.head())  # Show first 5 rows


✅ CSV Loaded Successfully! Number of rows: 32780
   Unnamed: 0                                               name  grape  \
0           0  1000 Stories Bourbon Barrel Aged Batch Blue Ca...    NaN   
1           1  1000 Stories Bourbon Barrel Aged Gold Rush Red...    NaN   
2           2  1000 Stories Bourbon Barrel Aged Gold Rush Red...    NaN   
3           3    1000 Stories Bourbon Barrel Aged Zinfandel 2013    NaN   
4           4    1000 Stories Bourbon Barrel Aged Zinfandel 2014    NaN   

                    region   variety  rating  \
0    Mendocino, California  Red Wine    91.0   
1               California  Red Wine    89.0   
2               California  Red Wine    90.0   
3  North Coast, California  Red Wine    91.0   
4               California  Red Wine    90.0   

                                               notes  
0  This is a very special, limited release of 100...  
1  The California Gold Rush was a period of coura...  
2  The California Gold Rush was a period of co

In [4]:
import os
import faiss
from langchain.schema import Document
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.docstore.in_memory import InMemoryDocstore

DB_PATH = "faiss_index"
CSV_PATH = "../wine-ratings.csv"
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete FAISS index if it exists
if os.path.exists(DB_PATH):
    print("🛑 Deleting old FAISS index...")
    os.system(f"rm -rf {DB_PATH}")

print("📌 Creating a new FAISS index...")

df = pd.read_csv(CSV_PATH)
column_name = "rating" if "rating" in df.columns else df.columns[1]
docs = [Document(page_content=str(row[column_name])) for _, row in df.iterrows()]

vector_store = FAISS(
    embeddings,
    faiss.IndexFlatL2(768),
    InMemoryDocstore({}),
    index_to_docstore_id={}
)

vector_store.add_documents(docs)
vector_store.save_local(DB_PATH)

print(f"✅ FAISS rebuilt with {len(docs)} documents!")


🛑 Deleting old FAISS index...
📌 Creating a new FAISS index...


AssertionError: 

In [5]:
# Check the embedding dimension
test_embedding = embeddings.embed_query("test")
print(f"✅ Embedding Dimension: {len(test_embedding)}")


✅ Embedding Dimension: 384


In [6]:
EMBED_DIM = len(embeddings.embed_query("test"))  # Get correct dimension dynamically

vector_store = FAISS(
    embeddings,
    faiss.IndexFlatL2(EMBED_DIM),  # Correct dimension
    InMemoryDocstore({}),
    index_to_docstore_id={}
)

In [7]:
sample_doc = [Document(page_content="Test document")]
vector_store.add_documents(sample_doc)
print("✅ FAISS can now accept documents!")


✅ FAISS can now accept documents!


In [8]:
import shutil

DB_PATH = "faiss_index"
if os.path.exists(DB_PATH):
    print("🛑 Deleting old FAISS index...")
    shutil.rmtree(DB_PATH)

print("📌 Rebuilding FAISS with correct dimension...")

df = pd.read_csv(CSV_PATH)
column_name = "rating" if "rating" in df.columns else df.columns[1]
docs = [Document(page_content=str(row[column_name])) for _, row in df.iterrows()]

EMBED_DIM = len(embeddings.embed_query("test"))  # Dynamically get correct dimension
vector_store = FAISS(
    embeddings,
    faiss.IndexFlatL2(EMBED_DIM),
    InMemoryDocstore({}),
    index_to_docstore_id={}
)

vector_store.add_documents(docs)
vector_store.save_local(DB_PATH)
print(f"✅ FAISS rebuilt with {len(docs)} documents!")


🛑 Deleting old FAISS index...
📌 Rebuilding FAISS with correct dimension...
✅ FAISS rebuilt with 32780 documents!


In [10]:
if os.path.exists(DB_PATH):
    print("📌 Loading existing FAISS index...")
    vector_store = FAISS.load_local(DB_PATH, embeddings, allow_dangerous_deserialization=True)
    print(f"📌 FAISS index contains {vector_store.index.ntotal} vectors.")

    # 🔍 DEBUG: Print sample stored documents
    print("📄 Sample stored documents from FAISS:")
    sample_docs = vector_store.similarity_search("random", k=3)
    for i, doc in enumerate(sample_docs):
        print(f"📜 Document {i+1}: {doc.page_content[:200]}")  # Print first 200 characters


📌 Loading existing FAISS index...
📌 FAISS index contains 32780 vectors.
📄 Sample stored documents from FAISS:
📜 Document 1: Name: Fairview Pinotage 2005. Grape: nan. Region: South Africa. Variety: Red Wine. Rating: 88.0. Notes: Colour: Vibrant purple red..
📜 Document 2: Name: Fairview Viognier 2009. Grape: nan. Region: South Africa. Variety: White Wine. Rating: 89.0. Notes: White pear and red apple fruit aromas, with a lovely spiciness. Fragrant marmalade and pear fr
📜 Document 3: Name: Fairview Viognier 2007. Grape: nan. Region: South Africa. Variety: White Wine. Rating: 90.0. Notes: Fruity-floral notes of pears, apricots and rose petals with whiffs of lavender. The 2007 Viogn


In [None]:
import requests

# LLaMA_CPP API URL (Check if it's running)
LLAMA_API_URL = "http://127.0.0.1:8080/v1/chat/completions"

# Define a valid OpenAI-compatible request
data = {
    "model": "LLaMA_CPP",
    "messages": [
        {"role": "system", "content": "You are a sommelier helping users find the best wines."},
        {"role": "user", "content": "What is the best wine from America?"}
    ],
    "max_tokens": 50,  # ✅ Reduce tokens for a faster response
    "temperature": 0.7
}

# Send the request
try:
    response = requests.post(LLAMA_API_URL, json=data)
    response.raise_for_status()  # Check for HTTP errors

    # Print the response
    print("✅ Success! Response from LLaMA_CPP:")
    print(response.json())

except requests.exceptions.RequestException as e:
    print("❌ Error:", e)


✅ Success! Response from LLaMA_CPP:
{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'Argentina is known for producing high-quality wines, and it can be challenging to recommend just one. However, some of the most famous and highly regarded wines from Argentina include:\n\n1. Malbec: This grape', 'role': 'assistant'}}], 'created': 1739651709, 'id': 'chatcmpl-2bQ2QXnk3CvCD9S2cAAn46vyYYEnYl34', 'model': 'LLaMA_CPP', 'object': 'chat.completion', 'usage': {'completion_tokens': 50, 'prompt_tokens': 64, 'total_tokens': 114}}
