In [1]:
import os
import openai
import sys
from dotenv import load_dotenv, find_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader, SeleniumURLLoader
from SPARQLWrapper import SPARQLWrapper, JSON
from langchain.chat_models import ChatOpenAI
from PyPDF2 import PdfReader
from fastapi import APIRouter
from fastapi import FastAPI
from pydantic import BaseModel
from fastapi.middleware.cors import CORSMiddleware
import nest_asyncio
import uvicorn

In [2]:
load_dotenv()

#using openai api key and loading data
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
data_path = "./data/"

db_directory = "./vector/"

pdf_files= os.listdir(data_path)
print(pdf_files)

def get_pdf_text(data_path, pdf_files):
    
    text = ""

    for pdf_file in pdf_files:
        reader = PdfReader(data_path+pdf_file)
        for page in reader.pages:
            text += page.extract_text()

    return text

text = get_pdf_text(data_path, pdf_files)

['Quick Cooking - Our 100 top recipes presented in one cookbook.pdf', 'recipe-book-1.zp210082.pdf']


In [4]:
def get_chunk_text(text):
    
    text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len
    )

    chunks = text_splitter.split_text(text)

    return chunks

# print(get_chunk_text(text))
data = get_chunk_text(text)
print(data)

['Quick Cooking\n \nDelicious dishes in no time? That’ s possible! This book compiles over 100\nexpress recipes prepared in next to no time and guaranteed to let you\nshine. With a maximum preparation time of 30 minutes for each recipe –\nthe rest practically takes care of itself.\n \nSnacks & salads, fish & seafood, meat & poultry , pasta creations &\nvegetarian dishes, patisserie & desserts – inside you will find the right\nrecipe for every occasion.Quick Cooking© Naumann & Göbel Verlagsgesellschaft mbH\na subsidiary of VEMAG Verlags- und Medien Aktiengesellschaft\nEmil-Hof fmann-Straße 1, 50996 Cologne (Germany)\nwww .vemag-medien.de\nRecipe photographs: TLC Fotostudio\nCover photograph: StockFood/Emap Esprit\nComplete production: Naumann & Göbel Verlagsgesellschaft mbH\nAll rights reserved\nISBN 978-3-8155-8769-0Contents\n \nThe essential ingredients\n \nSnacks & salads\n \nSalad with sole, salmon and monkfish\nRocket salad with cold duck breast\nGreen asparagus with ham\nChicken s

In [5]:
pdf_loader = DirectoryLoader("./data/", glob="**/*.pdf")
document = pdf_loader.load()
print(type(document))

<class 'list'>


In [7]:
# def joinData(data_path):
#     pdf_loader = DirectoryLoader(data_path, glob="**/*.pdf")

#     with open(data_path + "/" + "urls.txt") as webs:
#         lines =  webs.readlines()
#         webs.close()

#     webs_loaders = SeleniumURLLoader(urls=lines)

#     all_loaders = [pdf_loader, webs_loaders]



#     data = []

#     for loader in all_loaders:
#         data.extend(loader.load())
    
#     return data

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
document1= text_splitter.split_documents(document)

chat_history = []

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(documents=document1, embedding=embeddings, persist_directory=db_directory)

qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever())


class Prompt(BaseModel):
    user_prompt : str

# query = "give me the omelette recipe with ingredients and directions"

# response = qa({"question": query, "chat_history": chat_history})
# print(type(response))

#index_creator = VectorstoreIndexCreator()

# docsearch = index_creator.from_loaders(document)

# chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.vectorstore.as_retriever(), input_key="question")

# query = "Recommend me a soup recipe which contains tomato and give me the ingredients and the instructions"

# print(response["answer"])




In [24]:
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

#generates a chat completion using OpenAI chat completion API
def get_completion(prompt, model=llm_model):
# define a message in the form of a dictionary with role and content.
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0,
    )
    # return the content of the message from the generated completion.
    return response.choices[0].message["content"]

get_completion("Necesito que me lo conviertas este texto delimitado por triple paréntesis en una consulta de inserción SPARQL con prefijo, hacia un grafo llamado cooking ((({chatbot_prompt}))) y respondeme solo con el fragmento de codigo")

'PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\nPREFIX cooking: <http://example.org/cooking#>\n\nINSERT DATA\n{\n  GRAPH cooking: \n  {\n    ((cooking:recipe1 cooking:hasIngredient cooking:ingredient1))\n    ((cooking:recipe1 cooking:hasIngredient cooking:ingredient2))\n    ((cooking:recipe2 cooking:hasIngredient cooking:ingredient3))\n    ((cooking:recipe2 cooking:hasIngredient cooking:ingredient4))\n  }\n}'

In [16]:
#connetion with anzograph
anzograph_url = "http://localhost:8080/sparql"
usr = "admin"
pssw = "Passw0rd1"

#searching connection
sparql_connection = SPARQLWrapper(anzograph_url)
#defining credentianls
sparql_connection.setCredentials(usr, pssw)

#verifying connection by sending query
sparql_connection.setQuery("SELECT * WHERE { ?s ?p ?o } LIMIT 1")
#returning in json format
sparql_connection.setReturnFormat(JSON)

# executing and sending query to anzogrph
results = sparql_connection.query().convert()

# If there are results, the connection is successful
if results and "results" in results:
    print("Connection successful")
else:
    print("Connection refused")

Connection successful


In [25]:
def convertingQuery(chatbot_prompt):
    prompt_togpt = f"Necesito que me lo conviertas este texto delimitado por triple paréntesis en una consulta de inserción SPARQL con prefijo, hacia un grafo llamado cooking ((({chatbot_prompt}))) y respondeme solo con el fragmento de codigo"
    res = get_completion(prompt_togpt)
    sparql_connection.method = 'POST'
    sparql_connection.setQuery(f""" {res}""".encode("utf-8"))
    sparql_connection.setReturnFormat(JSON)

    results = sparql_connection.query().convert()

    if results in results:
        print('prompt success saved in graph')
    else:
        print('Error: Failed to insert query to anzograph')
    return res

In [26]:
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)


@app.post('/question')
async def chat_answer(prompt: Prompt):
  answer = qa({"question": prompt.user_prompt, "chat_history": chat_history})
  convertingQuery(prompt.user_prompt)
  print(convertingQuery(prompt.user_prompt))
  print(type(answer))
  return answer["answer"]
nest_asyncio.apply()
uvicorn.run(app, port=8081)

INFO:     Started server process [12220]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8081 (Press CTRL+C to quit)


INFO:     127.0.0.1:57458 - "OPTIONS /question HTTP/1.1" 200 OK
prompt success saved in graph
prompt success saved in graph
PREFIX cooking: <http://example.com/cooking#>

INSERT DATA {
  GRAPH cooking: {
    cooking:(Lamb_chops_with_herbs) cooking:hasIngredients "lamb chops, herbs" .
  }
}
<class 'dict'>
INFO:     127.0.0.1:57458 - "POST /question HTTP/1.1" 200 OK
INFO:     127.0.0.1:57477 - "OPTIONS /question HTTP/1.1" 200 OK
prompt success saved in graph
prompt success saved in graph
PREFIX cooking: <http://example.org/cooking#>

INSERT DATA {
  GRAPH cooking:suggest_me_a_recipe {
    (cooking:Recipe1 cooking:hasName "Pasta Carbonara")
    (cooking:Recipe1 cooking:hasIngredient "Spaghetti")
    (cooking:Recipe1 cooking:hasIngredient "Bacon")
    (cooking:Recipe1 cooking:hasIngredient "Egg")
    (cooking:Recipe1 cooking:hasIngredient "Parmesan Cheese")
  }
}
<class 'dict'>
INFO:     127.0.0.1:57477 - "POST /question HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [12220]
