In [1]:
pip install -U langchain-community faiss-cpu langchain-huggingface pymupdf tiktoken langchain-ollama python-dotenv





[notice] A new release of pip is available: 24.1.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import warnings
from dotenv import load_dotenv

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
warnings.filterwarnings("ignore")

load_dotenv()

True

In [3]:
os.environ['LANGCHAIN_PROJECT']

'chat_myPDF'

### Document Loader

In [4]:
from langchain_community.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("./rag-dataset/gym supplements/1. Analysis of Actual Fitness Supplement.pdf")

docs = loader.load()

In [5]:
doc = docs[0]
print(doc.page_content)

Citation: Espeño, P.R.; Ong, A.K.S.;
German, J.D.; Gumasing, M.J.J.; Casas,
E.S. Analysis of Actual Fitness
Supplement Consumption among
Health and Fitness Enthusiasts. Foods
2024, 13, 1424. https://doi.org/
10.3390/foods13091424
Academic Editors: Ilija Djekic
and Nada Smigic
Received: 30 March 2024
Revised: 15 April 2024
Accepted: 18 April 2024
Published: 6 May 2024
Copyright: © 2024 by the authors.
Licensee MDPI, Basel, Switzerland.
This article is an open access article
distributed
under
the
terms
and
conditions of the Creative Commons
Attribution (CC BY) license (https://
creativecommons.org/licenses/by/
4.0/).
foods
Article
Analysis of Actual Fitness Supplement Consumption among
Health and Fitness Enthusiasts
Paolo Renzo Espeño 1, Ardvin Kester S. Ong 1,2,*
, Josephine D. German 1
, Ma. Janice J. Gumasing 3
and Ethan S. Casas 1
1
School of Industrial Engineering and Engineering Management, Mapúa University, 658 Muralla St.,
Intramuros, Manila 1002, Philippines
2
E.T. Yuchengo Scho

In [6]:
import os

pdfs = []
for root, dirs, files in os.walk('rag-dataset'):
    # print(root, dirs, files)
    for file in files:
        if file.endswith('.pdf'):
            pdfs.append(os.path.join(root, file))
pdfs

['rag-dataset\\gym supplements\\1. Analysis of Actual Fitness Supplement.pdf',
 'rag-dataset\\gym supplements\\2. High Prevalence of Supplement Intake.pdf',
 'rag-dataset\\health supplements\\1. dietary supplements - for whom.pdf',
 'rag-dataset\\health supplements\\2. Nutraceuticals research.pdf',
 'rag-dataset\\health supplements\\3.health_supplements_side_effects.pdf']

In [7]:
docs = []
for pdf in pdfs:
    loader = PyMuPDFLoader(pdf)
    pages = loader.load()

    docs.extend(pages)

In [13]:
len(docs)

64

### Document Chunking

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

chunks = text_splitter.split_documents(docs)

In [9]:
len(docs), len(chunks)

(64, 311)

In [10]:
len(docs[0].page_content), len(chunks[0].page_content)

(4340, 981)

In [12]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-4o-mini")

len(encoding.encode(docs[0].page_content)), len(encoding.encode(chunks[0].page_content))

(969, 294)

### Document Vector Embedding

In [16]:
from langchain_ollama import OllamaEmbeddings

import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [17]:
embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")

single_vector = embeddings.embed_query("this is some text data")


In [18]:
len(single_vector)

768

In [19]:
index = faiss.IndexFlatL2(len(single_vector))
index.ntotal, index.d

(0, 768)

In [20]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [21]:
len(chunks)

311

In [22]:
ids = vector_store.add_documents(documents=chunks)

In [23]:
vector_store.index_to_docstore_id
len(ids)

311

In [24]:
# # store vector database
db_name = "health_supplements"
vector_store.save_local(db_name)

# # load vector database
new_vector_store = FAISS.load_local(db_name, embeddings=embeddings, allow_dangerous_deserialization=True)
len(new_vector_store.index_to_docstore_id)

311

### Retreival

In [26]:
while True:
    question = input("Enter your question(or type 'exit' to quit): ")
    
    if question.lower() == 'exit':
        print("Exiting the program.")
        break
    
    # Assuming vector_store is already defined and contains the relevant data
    docs = vector_store.search(query=question, search_type='similarity')
    
    print(f"\nSearch results for: '{question}'\n")
    
    for doc in docs:
        print(doc.page_content)
        print("\n\n")



Search results for: 'what are the benefits of BCAA supplements?'

Foods 2024, 13, 1424
2 of 21
and sports industry, evidence suggests that creatine can benefit not only athletes but also
the elderly and the general population [6]. Branched-chain amino acids (BCAA) also offer
a plethora of benefits for consumers. As explained by Sanz et al. [7], BCAAs are stored
directly in muscles and serve as the raw materials needed to build new muscle. This
contributes to the overall process of strengthening muscles and alleviating post-workout
soreness. Consumers often integrate these supplements into their routines with the aim of
optimizing the outcomes they wish to achieve and support overall well-being [1].
The sports supplement industry makes for an impressive market share of nearly
$12 billion in the health/wellness portfolio and is projected to be worth $24.4 billion
by 2025. In a study by Burke [4], which investigated 20 published studies about the
prevalence, methods, and reasons for supp

In [23]:
# question = "what is used to reduce weight?"
# question = "what are side effects of supplements?"
# question = "what are the benefits of supplements?"
# question = "what are the benefits of BCAA supplements?"

In [27]:
question = "what is used to gain muscle mass?"
docs = vector_store.search(query=question, search_type='similarity')

for doc in docs:
    print(doc.page_content)
    print("\n\n")

acids than traditional protein sources. Its numerous benefits have made it a popular choice
for snacks and drinks among consumers [3]. Another widely embraced supplement is
caffeine, which is found in many sports and food supplements. Caffeine reduces perceived
effort, minimizes fatigue and pain, and proves to be effective for endurance and high-
intensity activities, which is the choice of consumers [4].
Creatine monohydrate is another well-known supplement used to gain muscle mass
and support performance and recovery. It is known not to increase fat mass and remains
effective even when taken in recommended doses [5]. Despite its popularity in the fitness
Foods 2024, 13, 1424. https://doi.org/10.3390/foods13091424
https://www.mdpi.com/journal/foods



and strength gain among men. We detected more prevalent protein and creatine supplementation
among younger compared to older ﬁtness center users, whereas the opposite was found for vitamin
supplementation. Other authors made similar obse

In [28]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 3, 
                                                                          'fetch_k': 100,
                                                                          'lambda_mult': 1})

In [29]:
docs = retriever.invoke(question)

# for doc in docs:
#     print(doc.page_content)
#     print("\n\n")

In [30]:
#question = "what is used to reduce weight?"
question = "what are side effects of supplements?"
# question = "what are the benefits of supplements?"
# question = "what are the benefits of BCAA supplements?"
docs = retriever.invoke(question)
docs

[Document(metadata={'source': 'rag-dataset\\health supplements\\3.health_supplements_side_effects.pdf', 'file_path': 'rag-dataset\\health supplements\\3.health_supplements_side_effects.pdf', 'page': 1, 'total_pages': 11, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': '', 'producer': 'iLovePDF', 'creationDate': '', 'modDate': 'D:20241021143742Z', 'trapped': ''}, page_content='The intake of dietary supplements is generally safe, but not totally without risk. The current \nreview is not intended to be comprehensive report of all known adverse effect for all dietary \nsupplements. Instead, we have selected to discuss adverse events for the most commonly \nused supplements such as vitamins, minerals, omega-3/fish oil, soy protein, and plant-\nderived antioxidant and anti-inflammatory nutraceuticals. We also discuss weight-loss and \nbody building supplements, and various botanical supplements which have been associated \nwith more severe adverse ef