In [1]:
from langchain_openai import OpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA
import ast
import os
from dataclasses import dataclass
from typing import List
import inspect
from dotenv import load_dotenv

import os
import json

load_dotenv()
openai_api_key = os.environ.get('OPENAI_API_KEY')


@dataclass
class Document:
    page_content: str
    metadata: dict


def chunk_text(text):
    pages = text.split('\n\n')
    chunks = [page.strip() for page in pages if page.strip()]
    return chunks

def process_json_file(file_path):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    docs = []
    for item in data:
        if 'content' in item and isinstance(item['content'], list):
            for content in item['content']:
                if content['type'] == 'text':
                    text = content['text']
                    chunks = chunk_text(text)
                    for i, chunk in enumerate(chunks):
                        metadata = {
                            'id': item['id'],
                            'chunk_index': i,
                            'total_chunks': len(chunks)
                        }
                        doc = Document(page_content=chunk, metadata=metadata)
                        docs.append(doc)
    
    library = FAISS.from_documents(docs, embeddings)
    return library

# Example usage
file_path = 'output_directory/all_responses.json'
library = process_json_file(file_path)

# Now you can use the vector_store for similarity search or other operations

In [5]:
query = "How do i service the engine ?"

In [6]:
query_answers = library.similarity_search(query)

for i, answer in enumerate(query_answers):
    page_content = answer.page_content
    metadata = answer.metadata 

    def print_code_response(query_result):
        content = query_result.page_content
        metadata = query_result.metadata
        print(content)
        print(f"Metadata: {metadata}")

    query_answer = Document(
        page_content=page_content,
        metadata=metadata
    )
    print('---------------------------------------------')
    if i == 0:
        print('The most relevant page is:')
    else:
        print(f'The {i+1}th most relevant page is:')
    print_code_response(query_answer)

query_answer = query_answer

---------------------------------------------
The most relevant page is:
Metadata: {'id': 'msg_01SGvyPDZRFjt2h2Fm3vrx4X', 'chunk_index': 3, 'total_chunks': 10}
---------------------------------------------
The 2th most relevant page is:
Metadata: {'id': 'msg_013d9fxbopeXaPsiynKHvZXV', 'chunk_index': 10, 'total_chunks': 11}
---------------------------------------------
The 3th most relevant page is:
Metadata: {'id': 'msg_01Syf4WFQd3KcYeZN9ubm5Sm', 'chunk_index': 4, 'total_chunks': 10}
---------------------------------------------
The 4th most relevant page is:
Metadata: {'id': 'msg_018VXSZuR6EVaAPt3gttV8DK', 'chunk_index': 3, 'total_chunks': 11}


In [8]:
import os
import base64
import anthropic
import json
import time
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv('ANTHROPIC_API_KEY')

client = anthropic.Anthropic()


message = client.messages.create(
    model="claude-3-5-sonnet-20240620",
    max_tokens=1000,
    temperature=0,
    system="You are a professional manual writer.",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Choose the most relevant answer based on the users query and provide only the page number. Here is the query: {query} and here is the answer: {query_answer}?"
                }
            ]
        }
    ]
)
print(message.content)

[TextBlock(text='17', type='text')]
