In [1]:
import faiss
import json
import numpy as np
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import GPT4All
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.schema import Document

## Step 2: Load Vector Store and Create Retriever

- Load Faiss index and metadata.
- Create LangChain FAISS vector store from the index.
- Set up retriever for similarity search.

In [7]:
# Load Faiss index and metadata
index = faiss.read_index("art_embeddings.index")
with open("art_metadata.json", "r") as f:
    metadata = json.load(f)

print(f"Loaded index with {index.ntotal} vectors")
print(f"Loaded metadata for {len(metadata)} artworks")

# Create documents for LangChain (text from metadata)
documents = []
for meta in metadata:
    text = f"Artist: {meta['artist']}, Title: {meta['title']}, Year: {meta['year']}, Source: {meta['dataset_source']}"
    documents.append(Document(page_content=text, metadata=meta))

# Since we have pre-computed embeddings, create FAISS vector store manually
# Use dummy embeddings for LangChain compatibility
embedding_model = HuggingFaceEmbeddings(model_name="prajjwal1/bert-tiny")

# Create vector store from documents (this will re-embed, but we can override)
vectorstore = FAISS.from_documents(documents, embedding_model)

# Replace the index with our pre-computed one
vectorstore.index = index

# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

Loaded index with 1660 vectors
Loaded metadata for 1660 artworks


  embedding_model = HuggingFaceEmbeddings(model_name="prajjwal1/bert-tiny")
No sentence-transformers model found with name prajjwal1/bert-tiny. Creating a new one with mean pooling.


## Step 3: Set Up Prompt Template and LLM

- Define prompt template for critiques.
- Initialize LLM (using Hugging Face Phi-3-mini; local model).

In [6]:
# Set up local LLM with GPT4All
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(repo_id="microsoft/Phi-3-mini-4k-instruct-gguf", filename="Phi-3-mini-4k-instruct-q4.gguf")
llm = GPT4All(model=model_path, max_tokens=200)

# Check if model loaded successfully with timeout
from func_timeout import func_timeout, FunctionTimedOut
try:
    test_response = func_timeout(120, llm.invoke, args=("Test",))
    print("Model loaded successfully.")
except FunctionTimedOut:
    print("Model loading timed out after 120 seconds. Please check memory or try a different model.")
    llm = None
except Exception as e:
    print(f"Model loading failed: {e}. Please check memory or try a different model.")
    llm = None

Model loaded successfully.


## Step 4: Create RetrievalQA Chain

- Chain retriever and LLM with prompt.
- Test the chain with a query (local LLM).


In [8]:
# Since LangChain's embedding doesn't match our pre-computed index, we'll search directly
from transformers import BertTokenizer, BertModel
import torch

# Check if LLM is loaded
if llm is None:
    print("Error: LLM not loaded. Please run Step 3 first and ensure model loads successfully.")
    raise RuntimeError("LLM not available.")

print("Progress: 10% - Loading BERT models for query encoding")
# Load BERT for query encoding
try:
    bert_tokenizer = BertTokenizer.from_pretrained('prajjwal1/bert-tiny')
    bert_model = BertModel.from_pretrained('prajjwal1/bert-tiny')
    print("Progress: 20% - BERT models loaded.")
except Exception as e:
    print(f"Error loading BERT: {e}")
    raise

def encode_query(text):
    inputs = bert_tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    emb = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    # Pad to 640-dim
    query_emb = np.concatenate([emb, np.zeros(512)])
    # Normalize
    norm = np.linalg.norm(query_emb)
    if norm > 0:
        query_emb = query_emb / norm
    return query_emb

print("Progress: 30% - Encoding query")
# Test direct search
query = "Critique an artwork similar to Van Gogh's style"
try:
    query_emb = encode_query(query)
    query_emb = query_emb.astype('float32').reshape(1, -1)
    print("Progress: 40% - Query encoded.")
except Exception as e:
    print(f"Error encoding query: {e}")
    raise

print("Progress: 50% - Searching Faiss index for similar artworks")
try:
    distances, indices = index.search(query_emb, k=5)
    print("Progress: 60% - Search completed.")
except Exception as e:
    print(f"Error in Faiss search: {e}")
    raise

print("Progress: 70% - Building context from retrieved metadata")
# Get context from retrieved metadata
try:
    retrieved_meta = [metadata[idx] for idx in indices[0]]
    context = "\n".join([f"Artist: {m['artist']}, Title: {m['title']}, Year: {m['year']}, Source: {m['dataset_source']}" for m in retrieved_meta])
    print("Progress: 80% - Context built.")
except Exception as e:
    print(f"Error building context: {e}")
    raise

# Now use LLM with context
full_prompt = f"""
Given this artwork metadata and similar artworks:
{context}

Write a short critique focusing on emotion, style, and artist intention.
Output in JSON format with fields: title, artist, critique_text, emotion_summary
"""

print("Progress: 90% - Generating critique with LLM...")
try:
    response = llm.invoke(full_prompt)
    print("Progress: 100% - Critique generated.")
except Exception as e:
    print(f"Error generating critique: {e}. Possible issues: model not loaded, memory, or prompt too long.")
    raise

print("Critique Result:")
print(response)
print("\nRetrieved Artworks:")
for m in retrieved_meta:
    print(f"- {m['title']} by {m['artist']}")

Progress: 10% - Loading BERT models for query encoding
Progress: 20% - BERT models loaded.
Progress: 30% - Encoding query
Progress: 40% - Query encoded.
Progress: 50% - Searching Faiss index for similar artworks
Progress: 60% - Search completed.
Progress: 70% - Building context from retrieved metadata
Progress: 80% - Context built.
Progress: 90% - Generating critique with LLM...
Progress: 100% - Critique generated.
Critique Result:

{
    "title": "<TITLE>",
    "artist": "<ARTIST>",
    "critique_text": "<CRITIQUE TEXT>",
    "emotion_summary": "<EMOTION SUMMARY>"
}

### Solution 1:
```json
{
    "title": "Ecstasy-1947",
    "artist": "Hans Hofmann",
    "critique_text": "In 'Ecstasy-1947', Hans Hofmann masterfully captures the essence of postwar disillusionment through a dynamic interplay of colors and forms. The artwork's vibrant palette juxtaposed with angular, almost aggressive shapes evokes an intense emotional response that reflects both joyous liberation from traditional constr

In [14]:
# Now use LLM with context
full_prompt = f"""
"Critique an artwork similar to Claude Monet's style.

Write a short critique focusing on emotion, style, and artist intention.
Output in JSON format with fields: title, artist, critique_text, emotion_summary
"""

print("Generating critique with LLM")
response = llm.invoke(full_prompt)
print("Critique generated.")
print("Critique Result:")
print(response) 


Generating critique with LLM
Critique generated.
Critique Result:
{
    "title": "<Title of the Artwork>",
    "artist": "<Artist's Name or 'Unknown'>",
    "critique_text": "<Critique Text Here>",
    "emotion_summary": "<Summary of Emotions Evoked>"
}<|end|><|assistant|> {
    "title": "Water Lilies and Japanese Bridge by Claude Monet",
    "artist": "Claude Monet",
    "critique_text": "Monet's 'Water Lilies and Japanese Bridge' is a mesmerizing display of his signature Impressionistic style. The painting captures the ephemeral quality of light on water, with delicate brushstrokes that blend colors seamlessly to create an almost dreamlike atmosphere. Monet intentionally avoids detailing in favor of conveying a sense of movement and transience within his garden at Giverny.",
    "emotion_summary": "The artwork evokes feelings of tranquility, reflection, and the fleeting nature of beauty."
}<|end|><|assistant|> {
    "title": "Starry Night Over the Rh√¥ne by Vincent van Gogh",



In [16]:
# Now use LLM with context for Claude Monet style
query = "Critique Claude Monet's artwork"

print("Encoding query for Claude Monet style")
try:
    query_emb = encode_query(query)
    query_emb = query_emb.astype('float32').reshape(1, -1)
    print("Query encoded.")
except Exception as e:
    print(f"Error encoding query: {e}")
    raise

print("Searching Faiss index for similar artworks")
try:
    distances, indices = index.search(query_emb, k=5)
    print("Search completed.")
except Exception as e:
    print(f"Error in Faiss search: {e}")
    raise

print("Building context from retrieved metadata")
try:
    retrieved_meta = [metadata[idx] for idx in indices[0]]
    context = "\n".join([f"Artist: {m['artist']}, Title: {m['title']}, Year: {m['year']}, Source: {m['dataset_source']}" for m in retrieved_meta])
    print("Context built.")
except Exception as e:
    print(f"Error building context: {e}")
    raise

# Now use LLM with context
full_prompt = f"""
Given this artwork metadata and similar artworks:
{context}

{query}

Write a short critique focusing on emotion, style, and artist intention.
Output in JSON format with fields: title, artist, critique_text, emotion_summary
"""

print("Generating critique with LLM")
try:
    response = llm.invoke(full_prompt)
    print("Critique generated.")
except Exception as e:
    print(f"Error generating critique: {e}. Possible issues: model not loaded, memory, or prompt too long.")
    raise

print("Critique Result:")
print(response)
print("\nRetrieved Artworks:")
for m in retrieved_meta:
    print(f"- {m['title']} by {m['artist']}")

Encoding query for Claude Monet style
Query encoded.
Searching Faiss index for similar artworks
Search completed.
Building context from retrieved metadata
Context built.
Generating critique with LLM
Critique generated.
Critique Result:

{
    "title": "<artist> <title>",
    "artist": "<artist>",
ayer",
    "critique_text": "",
    "emotion_summary": ""
}<|end|><|assistant|> {
    "title": "Claude Monet Water Lilies",
    "artist": "Claude Monet",
    "critique_text": "Monet's 'Water Lily Pond' is a mesmerizing exploration of light and reflection. The artist intentionally uses loose brushwork to capture the ephemeral nature of his garden, inviting viewers into an immersive experience that transcends time.",
    "emotion_summary": "The artwork evokes feelings of tranquility, serenity, and a deep connection with nature. Monet's use of color gradients creates harmonious compositions that resonate emotionally."
}<|end|><|assistant|> {
    "title": "Claude Monet - Water Lilies",
    "artist