In [None]:
!pip install -q torch transformers langchain langchain-community langchain-huggingface faiss-cpu

In [48]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain

In [49]:
documents = []
for root, _, files in os.walk("docs"):
    for file in files:
        if file.endswith(".pdf"):
            print(f"Loading file: {file}")
            loader = PyPDFLoader(os.path.join(root, file))
            documents.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)
vectorstore = FAISS.from_documents(texts, HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))

Loading file: A Beginners Guide to the Stock Market.pdf
Loading file: fastfacts-what-is-climate-change.pdf
Loading file: report.pdf


In [None]:
checkpoint = "LaMini-T5-738M"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(
    checkpoint, 
    device_map='auto', 
    torch_dtype=torch.float32,
    trust_remote_code=True
)

pipe = pipeline(
    'text2text-generation',
    model=base_model,
    tokenizer=tokenizer,
    max_length=256,
    do_sample=True,
    temperature=0.2,
    top_p=0.95
)

llm = HuggingFacePipeline(pipeline=pipe)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
retriever = vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(llm,
                                          retriever,
                                          return_source_documents=True)

# TODO implement working chat history
chat_history = []

In [51]:
chat_history.clear()
question = "What is Understand Myself?"
generated_text = qa.invoke({"question": question, "chat_history": chat_history})
answer = generated_text['answer']    
# chat_history.append((question, answer))
print(answer)

The Understand Myself personality assessment and report is based on the Big Five Aspects Scale, the scientific model that describes your personality through the Big Five factors and each of their two aspects.


In [58]:
question = "Who is the Understand Myself report for?"
generated_text = qa.invoke({"question": question, "chat_history": chat_history})
answer = generated_text['answer']
# chat_history.append((question, answer))
print(answer)

The Understand Myself report is for Mihai Farcas.


In [57]:
question = "What is the Openness trait?"
generated_text = qa.invoke({"question": question, "chat_history": chat_history})
answer = generated_text['answer']
print(answer)

The Openness trait is a measure of creativity, artistic interest, and intelligence (particularly verbal intelligence) in the Big Five personality trait scientic model.


In [54]:
question = "What is my score in Openness trait?"
generated_text = qa.invoke({"question": question, "chat_history": chat_history})
answer = generated_text['answer']
print(answer)

Your score in Openness trait is 75.


In [55]:
question = "What does 75 percentile mean in Openness?"
generated_text = qa.invoke({"question": question, "chat_history": chat_history})
answer = generated_text['answer']
print(answer)

The 75 percentile in Openness indicates where you stand on a particular trait with respect to the population.
