### Before start
- Initialize the environment running this command: 
 python -m venv .venv
- Don't forget to add your API key on .env

In [None]:
# Libs to install
!pip install langchain
!pip install python-dotenv
!pip install openai
!pip install pypdf
!pip install bs4
!pip install unstructured[local-inference] -q
!pip install selenium
!pip install pydantic-settings
!pip install chromadb
!pip install tiktoken
!pip install fastapi nest-asyncio pyngrok uvicorn

### Libraries & GPT Settings

In [None]:
# Libraries
import os
import openai
import datetime
import uvicorn
import nest_asyncio

from dotenv import load_dotenv, find_dotenv
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import SeleniumURLLoader
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from pyngrok import ngrok


In [None]:
# GPT API settings
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

current_date = datetime.datetime.now().date()

target_date = datetime.date(2024, 6, 12)

if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

## Functions

In [None]:
def chatWithGPT(prompt, model=llm_model):
    """
    chatWithGPT send the message to ChatGPT API and returns its answer
        :prompt: is the user prompt
        :model: (optional) indicates the GPT model
        :return: returns the answer from ChatGPT
    """
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0,
    )
    return response.choices[0].message["content"]

def getAllData(data_dirpath):
    """
    getAllData loads all data using readFunctions
        :data_dirpath: Directory path of the file that contains all files to load
        :return: returns all data in a document with .load() format
    """
    # Read PDFs
    pdf_loader = DirectoryLoader(data_dirpath, glob="**/*.pdf")
        
    # Read web URLs in .txt
    with open(data_dirpath + "/" + "webURLs.txt") as f:
        lines = f.readlines()
        f.close()
        
    webpages_loader = SeleniumURLLoader(urls=lines)
    
    loaders = [pdf_loader, webpages_loader]
    documents = []
    
    for loader in loaders:
        documents.extend(loader.load())
                    
    return documents            
    
def getChunkText(documents):
    """
    getChunkText function chunks all text data in chunks
        :text: text data
        :return: chunks of data
    """
    text_splitter = CharacterTextSplitter(
    separator = ".",
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len
    )

    chunks = text_splitter.split_documents(documents)
    return chunks

def chatWithDocs(question, chat_history):
    response = chat_with_docs({"question": question,
                           "chat_history": chat_history})
    return response["answer"]

## Main

### Loading data

In [None]:

documents = getAllData("data")


#### documents in the output

In [None]:
print(documents[5])

### Splitting

In [None]:
chunks = getChunkText(documents)

#### chunks in ouput

In [None]:
print(chunks[22])

### Embedding

In [None]:
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(chunks, embeddings)

### Initialize Langchain - Conversation Retrieval Chain

In [None]:
chat_history = []
chat_with_docs = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever())

#### Tests

In [None]:
chatWithDocs("Hi, I would like to do some exercise. I want to gain legs muscles", chat_history)

In [None]:
chatWithDocs("que es la maquina de turing", chat_history)

### POST request

In [None]:
app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=['*'],
    allow_credentials=True,
    allow_methods=['*'],
    allow_headers=['*'],
)

class Prompt(BaseModel):
    user_prompt : str

@app.post('/chat_with_docs')
async def Post_prompt(prompt : Prompt):
    return {"response" : chatWithDocs(prompt.user_prompt, chat_history)}

In [None]:
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)