# Importing libraries

In [1]:
# LangChain
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

# Removing warnings
import warnings
warnings.filterwarnings('ignore')

# System libraries
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath('../src'))

# Importing modules
from modules.ingesting import loading_PDF, chunking, data_base
from modules.chat_app import chat_interface

# Loading PDF

In [2]:
pdf = loading_PDF('../data/PDFs/Sustainable_development_of_distance_learning_in_continuing_adult_education__The impact_of_artificial_intelligence.pdf') # Please, input the PDF path

Loading the document...
PDF loaded successfully


# Embedding process
- ```Chunking```
- Adding to the database ```(Chroma)```
- ```RAG (retrival - Summirisation)```

## Chunking

In [3]:
chunks = chunking(pdf);

The chunking process is complete with a final number of chunks of 32


## Adding to the database (Chroma)

In [4]:
data_base = data_base(chunks)

Vector database created successfully


## RAG
In this case, the Gemma2 model was implemented because it is faster, but it was already tested on llama3.2, which is more robust. 

In [5]:
# LLM
llm = ChatOllama(model='Gemma2')

# Template
prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Retrieving
retriever = MultiQueryRetriever.from_llm(
    data_base.as_retriever(), 
    llm,
    prompt=prompt
)

# Create chain

In [6]:
# RAG prompt template
template = """Answer the question based ONLY on the following context: {context}
Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Create chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Chatting with the PDF
## Question 1:

In [7]:
chat_interface(chain, 'What is the central theme of the paper?')

NameError: name 'chain' is not defined

## Question 2:

In [None]:
chat_interface('How is AI changing adult education?')

## Question 3:

In [None]:
chat_interface("What makes adult learners' needs unique?")

## Question 4:

In [None]:
chat_interface('What are the key benefits of AI in distance learning?')

## Question 5:

In [None]:
chat_interface('Which AI tools are mentioned for educational use?')