In [1]:
!pip install -q langchain
!pip install -q torch
!pip install -q transformers
!pip install -q sentence-transformers
!pip install -q datasets
!pip install -q faiss-cpu
!pip install -q langchain_community
!pip install -q pypdf


[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip instal

In [2]:
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import HuggingFacePipeline
# from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import HuggingFaceHub
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# To grant access to certain models and repositories
from huggingface_hub import login

  from .autonotebook import tqdm as notebook_tqdm


# Loading and preparing the data

In [3]:
loader = PyPDFDirectoryLoader("./documents")

docs_before_split = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 50,
)
docs_after_split = text_splitter.split_documents(docs_before_split)

In [4]:
avg_doc_length = lambda docs: sum([len(doc.page_content) for doc in docs])//len(docs)
avg_char_before_split = avg_doc_length(docs_before_split)
avg_char_after_split = avg_doc_length(docs_after_split)

print(f'Before split, there were {len(docs_before_split)} documents loaded, with average characters equal to {avg_char_before_split}.')
print(f'After split, there were {len(docs_after_split)} documents (chunks), with average characters equal to {avg_char_after_split} (average chunk length).')

Before split, there were 71 documents loaded, with average characters equal to 1089.
After split, there were 156 documents (chunks), with average characters equal to 493 (average chunk length).


# Preparing the embedding model

In [5]:
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",  # alternatively use "sentence-transformers/all-MiniLM-l6-v2" for a light and faster experience.
    model_kwargs={'device':'cpu'}, 
    encode_kwargs={'normalize_embeddings': True}
)

  huggingface_embeddings = HuggingFaceBgeEmbeddings(


In [6]:
sample_embedding = np.array(huggingface_embeddings.embed_query(docs_after_split[0].page_content))
print("Sample embedding of a document chunk (only the beginning): ", sample_embedding[:5])
print("Size of the embedding: ", sample_embedding.shape)

Sample embedding of a document chunk (only the beginning):  [-0.01617864  0.05044233  0.01222122 -0.06638864  0.05134476]
Size of the embedding:  (384,)


# Preparing the retrieval system for vector embeddings

In [7]:
vectorstore = FAISS.from_documents(docs_after_split, huggingface_embeddings)

In [8]:
query = """Before using bamboo, with what must it be treated?"""  
         # Sample question, change to other questions you are interested in.
relevant_documents = vectorstore.similarity_search(query)
print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)

There are 4 documents retrieved which are relevant to the query. Display the first one:

14 
 
 
 
 
a)                                                                                       b) 
Figure II:5 Seasoning of bamboo a) external view, b) internal view  
II.3.2.2 Preservative treatment options   
Preservatives treatments involve adding toxins to the bamboo to deter fungal and insect 
attack. Many have limited effectiveness or pose major health and safety risks. For 
example, older copper‐based preservatives including copper‐chrome‐arsenic (CCA) 
and a mmoniac‐copper‐arsenate (ACA) should be avoided. The two basic types of 
preservative that are widely used and recognized as efficient, safe, and most


# Create a retriever interface using vector store, we’ll use it later to construct Q & A chain using LangChain.

In [9]:
# Use similarity searching algorithm and return 3 most relevant documents.
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

Embedding, vector store and retrieval system ready

# LLM Part

In [10]:
hf = HuggingFacePipeline.from_model_id(
    # model_id="prithivMLmods/Llama-Deepsync-1B",
    # model_id="prithivMLmods/Triangulum-1B",
    # model_id="unsloth/Llama-3.2-1B-Instruct",
    model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct",
    # model_id="PowerInfer/SmallThinker-3B-Preview",
    # model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0.1, "max_new_tokens": 300,},
)

llm = hf 
llm.invoke(query)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


'Before using bamboo, with what must it be treated?'

# Putting up the Q & A

In [11]:
prompt_template = """Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

Context: {context}

Question: {question}

Helpful Answer:
"""

PROMPT = PromptTemplate(
 template=prompt_template, input_variables=["context", "question"]
)

In [12]:
retrievalQA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

In [13]:
query = """Before using bamboo, with what must it be treated?"""

In [14]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])



Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

Context: 14 
 
 
 
 
a)                                                                                       b) 
Figure II:5 Seasoning of bamboo a) external view, b) internal view  
II.3.2.2 Preservative treatment options   
Preservatives treatments involve adding toxins to the bamboo to deter fungal and insect 
attack. Many have limited effectiveness or pose major health and safety risks. For 
example, older copper‐based preservatives including copper‐chrome‐arsenic (CCA) 
and a mmoniac‐copper‐arsenate (ACA) should be avoided. The two basic types of 
preservative that are widely used and recognized as efficient, safe, and most

Moreover, wi

In [15]:
relevant_docs = result['source_documents']
print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')
print("*" * 100)
for i, doc in enumerate(relevant_docs):
    print(f"Relevant Document #{i+1}:\nSource file: {doc.metadata['source']}, Page: {doc.metadata['page']}\nContent: {doc.page_content}")
    print("-"*100)
    print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')

There are 3 documents retrieved which are relevant to the query.
****************************************************************************************************
Relevant Document #1:
Source file: documents\BIBOUM CHARLENE FINAL CORRECTED COPY.pdf, Page: 24
Content: 14 
 
 
 
 
a)                                                                                       b) 
Figure II:5 Seasoning of bamboo a) external view, b) internal view  
II.3.2.2 Preservative treatment options   
Preservatives treatments involve adding toxins to the bamboo to deter fungal and insect 
attack. Many have limited effectiveness or pose major health and safety risks. For 
example, older copper‐based preservatives including copper‐chrome‐arsenic (CCA) 
and a mmoniac‐copper‐arsenate (ACA) should be avoided. The two basic types of 
preservative that are widely used and recognized as efficient, safe, and most
----------------------------------------------------------------------------------------------------


In [16]:
query = """What is the best bamboo species for construction?"""
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])



Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

Context: building results in energy savings, conservation of the world's limited resources, and pollution 
reduction. In addition, bamboo has good mechanical properties. It can be harvested and be used 
in construction and other engineering pro jects efficiently from four to five -year-old. Some 
bamboo species have a higher compressive strength than wood, brick or concrete; and a tensile 
strength that rivals steel.  
However, the utility of bamboo in contemporary buildings has yet to get its recognition as a 
standard structural material . Even though b amboo construction has seen worldwide growth, 
this building technology is not yet popul

In [17]:
relevant_docs = result['source_documents']
print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')
print("*" * 100)
for i, doc in enumerate(relevant_docs):
    print(f"Relevant Document #{i+1}:\nSource file: {doc.metadata['source']}, Page: {doc.metadata['page']}\nContent: {doc.page_content}")
    print("-"*100)
    print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')

There are 3 documents retrieved which are relevant to the query.
****************************************************************************************************
Relevant Document #1:
Source file: documents\BIBOUM CHARLENE FINAL CORRECTED COPY.pdf, Page: 11
Content: building results in energy savings, conservation of the world's limited resources, and pollution 
reduction. In addition, bamboo has good mechanical properties. It can be harvested and be used 
in construction and other engineering pro jects efficiently from four to five -year-old. Some 
bamboo species have a higher compressive strength than wood, brick or concrete; and a tensile 
strength that rivals steel.  
However, the utility of bamboo in contemporary buildings has yet to get its recognition as a 
standard structural material . Even though b amboo construction has seen worldwide growth, 
this building technology is not yet popular in Cameroon.
------------------------------------------------------------------------

In [18]:
query = """What is the average compressive strength parallel to the fibers of bamboo vulgaris?"""
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])



Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

Context: An architectural design of a bamboo hut using the software ArchiCAD 26 was done and the 
structural analysis of the bamboo frame structure was done to demonstr ate the behavior of 
bamboo under load using the software Autodesk Robot Structural Analysis Professional 2022. 
Result obtained show that Bambusa vulgaris has an average shear strength of B. vulgaris for 
specimens without a node is 4.44 Mpa, the average ten sile strength parallel to the fibers of B. 
vulgaris without a node was approximately 110.14 Mpa, and the average compressive strength 
parallel to the fibers of B. vulgaris was approximately 44.8 Mpa suitable properties 

In [19]:
relevant_docs = result['source_documents']
print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')
print("*" * 100)
for i, doc in enumerate(relevant_docs):
    print(f"Relevant Document #{i+1}:\nSource file: {doc.metadata['source']}, Page: {doc.metadata['page']}\nContent: {doc.page_content}")
    print("-"*100)
    print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')

There are 3 documents retrieved which are relevant to the query.
****************************************************************************************************
Relevant Document #1:
Source file: documents\BIBOUM CHARLENE FINAL CORRECTED COPY.pdf, Page: 4
Content: An architectural design of a bamboo hut using the software ArchiCAD 26 was done and the 
structural analysis of the bamboo frame structure was done to demonstr ate the behavior of 
bamboo under load using the software Autodesk Robot Structural Analysis Professional 2022. 
Result obtained show that Bambusa vulgaris has an average shear strength of B. vulgaris for 
specimens without a node is 4.44 Mpa, the average ten sile strength parallel to the fibers of B. 
vulgaris without a node was approximately 110.14 Mpa, and the average compressive strength 
parallel to the fibers of B. vulgaris was approximately 44.8 Mpa suitable properties for  a
----------------------------------------------------------------------------------

In [20]:
query = """In which town and region can native bamboo be found?"""
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])



Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

Context: 31 
CHAPTER III METHODOLOGY 
III.1 Materials 
• Bamboo (Bambusa vulgaris): 
Bambusa vulgaris also known as Common Bamboo is a giant tropical and subtropical 
clumping bamboo native to southern China and Madagascar. This species is cultivated 
extensively in many parts of the world. In Cameroon, it is found in the North West, South 
West, West, Littoral, East, and Center regions. Bambusa vulgaris culms are bright green, 
glossy, erect below and arching above (figureIII:1) and have an average height between 10-
20 m. Internodes are 25-35 cm long, and have an average diameter of 4-10 cm. Wall 
thickness ranges between 7-15 mm. Bambusa v

In [21]:
relevant_docs = result['source_documents']
print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')
print("*" * 100)
for i, doc in enumerate(relevant_docs):
    print(f"Relevant Document #{i+1}:\nSource file: {doc.metadata['source']}, Page: {doc.metadata['page']}\nContent: {doc.page_content}")
    print("-"*100)
    print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')

There are 3 documents retrieved which are relevant to the query.
****************************************************************************************************
Relevant Document #1:
Source file: documents\BIBOUM CHARLENE FINAL CORRECTED COPY.pdf, Page: 41
Content: 31 
CHAPTER III METHODOLOGY 
III.1 Materials 
• Bamboo (Bambusa vulgaris): 
Bambusa vulgaris also known as Common Bamboo is a giant tropical and subtropical 
clumping bamboo native to southern China and Madagascar. This species is cultivated 
extensively in many parts of the world. In Cameroon, it is found in the North West, South 
West, West, Littoral, East, and Center regions. Bambusa vulgaris culms are bright green, 
glossy, erect below and arching above (figureIII:1) and have an average height between 10-
20 m. Internodes are 25-35 cm long, and have an average diameter of 4-10 cm. Wall 
thickness ranges between 7-15 mm. Bambusa vulgaris often occurs spontaneously or
--------------------------------------------------

In [22]:
query = """Which dynasty in the 1300s started using bamboo as a construction material?"""
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])



Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

Context: 11 
II.3 Bamboo as a construction material 
II.3.1 History of bamboo as construction material 
Documented on bamboo dates back 7000 years, but the plant has existed even before humans. 
Over 5000 years ago, Chinese people used bamboo to construct treehouses. Bamboo was used 
as an interior building material with the Ming Dynasty starting in the 1300s. It was used for 
bedding and flooring in homes. This use further established bamboo as a building material for 
indoor and outdoor. In the late 1990s, inventors in C hina developed a treatment that  made 
bamboo look and feel like hardwood . By cutting and planning the bamboo stalks int

In [23]:
relevant_docs = result['source_documents']
print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')
print("*" * 100)
for i, doc in enumerate(relevant_docs):
    print(f"Relevant Document #{i+1}:\nSource file: {doc.metadata['source']}, Page: {doc.metadata['page']}\nContent: {doc.page_content}")
    print("-"*100)
    print(f'There are {len(relevant_docs)} documents retrieved which are relevant to the query.')

There are 3 documents retrieved which are relevant to the query.
****************************************************************************************************
Relevant Document #1:
Source file: documents\BIBOUM CHARLENE FINAL CORRECTED COPY.pdf, Page: 21
Content: 11 
II.3 Bamboo as a construction material 
II.3.1 History of bamboo as construction material 
Documented on bamboo dates back 7000 years, but the plant has existed even before humans. 
Over 5000 years ago, Chinese people used bamboo to construct treehouses. Bamboo was used 
as an interior building material with the Ming Dynasty starting in the 1300s. It was used for 
bedding and flooring in homes. This use further established bamboo as a building material for 
indoor and outdoor. In the late 1990s, inventors in C hina developed a treatment that  made 
bamboo look and feel like hardwood . By cutting and planning the bamboo stalks into planks,
------------------------------------------------------------------------------