In [19]:
import os
from PyPDF2 import PdfReader
from softtek_llm.chatbot import Chatbot
from softtek_llm.models import OpenAI
from softtek_llm.cache import Cache
from softtek_llm.vectorStores import PineconeVectorStore
from softtek_llm.embeddings import OpenAIEmbeddings
from softtek_llm.schemas import Filter
from dotenv import load_dotenv

In [20]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if OPENAI_API_KEY is None:
    raise ValueError("OPENAI_API_KEY not found in .env file")

OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
if OPENAI_API_BASE is None:
    raise ValueError("OPENAI_API_BASE not found in .env file")

OPENAI_EMBEDDINGS_MODEL_NAME = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME")
if OPENAI_EMBEDDINGS_MODEL_NAME is None:
    raise ValueError("OPENAI_EMBEDDINGS_MODEL_NAME not found in .env file")

OPENAI_CHAT_MODEL_NAME = os.getenv("OPENAI_CHAT_MODEL_NAME")
if OPENAI_CHAT_MODEL_NAME is None:
    raise ValueError("OPENAI_CHAT_MODEL_NAME not found in .env file")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
if PINECONE_API_KEY is None:
    raise ValueError("PINECONE_API_KEY not found in .env file")

PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT")
if PINECONE_ENVIRONMENT is None:
    raise ValueError("PINECONE_ENVIRONMENT not found in .env file")

PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
if PINECONE_INDEX_NAME is None:
    raise ValueError("PINECONE_INDEX_NAME not found in .env file")


In [22]:
vector_store = PineconeVectorStore(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENVIRONMENT,
    index_name=PINECONE_INDEX_NAME,
)
embeddings_model = OpenAIEmbeddings(
    api_key=OPENAI_API_KEY,
    model_name=OPENAI_EMBEDDINGS_MODEL_NAME,
    api_type="azure",
    api_base=OPENAI_API_BASE,
)
cache = Cache(
    vector_store=vector_store,
    embeddings_model=embeddings_model,
)
model = OpenAI(
    api_key=OPENAI_API_KEY,
    model_name=OPENAI_CHAT_MODEL_NAME,
    api_type="azure",
    api_base=OPENAI_API_BASE,
    verbose=False,
)
#filters = [
#    Filter(
#    )
#]
chatbot = Chatbot(
    model=model,
    description="You are a very helpful and polite chatbot",
    filters=filters,
    cache=cache,
    verbose=False,
)


In [23]:
# Especifica el nombre del archivo PDF directamente
nombre_archivo_pdf = "pdfgg5.pdf"  # Reemplaza con el nombre de tu archivo PDF

# Intentar abrir y leer el contenido del archivo PDF
try:
    pdf_reader = PdfReader(nombre_archivo_pdf)
    texto_a_resumir = ""
    for page in pdf_reader.pages:
        texto_a_resumir += page.extract_text()
except FileNotFoundError:
    print(f"El archivo '{nombre_archivo_pdf}' no fue encontrado.")
    exit()

In [24]:
response = chatbot.chat(
    prompt=f"Summary of the following text:\n{texto_a_resumir}\n\Summary:",
    print_cache_score=True
)

Cache score: 0.999154866


In [25]:
response

Response(message=Message(role='assistant', content="Until my last update in September 2021, OpenAI did not provide a specific API for directly summarizing text content from files (e.g., .txt). The OpenAI API at that time was more focused on text generation and answering text-based questions.\n\nTo create a chatbot that summarizes the content of a .txt file, you would generally need to use natural language processing (NLP) libraries and techniques. A common approach would be to use a Python library like NLTK (Natural Language Toolkit) or spaCy to process the text and generate a summary.\n\nHere's a basic example of how you could use the NLTK library to summarize a .txt file:\n\n1. Read the contents of the .txt file.\n2. Preprocess the text by tokenizing it into sentences or words.\n3. Perform any necessary cleaning or filtering of the text.\n4. Use NLTK's summarization methods (e.g., extracting important sentences, using algorithms like TextRank) to generate a summary.\n5. Output the su