In [20]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader, WebBaseLoader, ArxivLoader
import os
from dotenv import load_dotenv
import bs4


In [21]:
# Load environment variables for API key
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


In [22]:

# Initialize embeddings and loaders
embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002")
pdf_loader = PyPDFDirectoryLoader("data")
web_loader = WebBaseLoader(
    web_paths=("https://learnflix.in/ncert-syllabus-class-7-science.html",),
    bs_kwargs=dict(parse_only=bs4.SoupStrainer(class_=("post-title", "post-content", "post-header")))
)
arxiv_loader = ArxivLoader(query="2304.11149", load_max_docs=2)

In [23]:
# Load and combine documents
all_docs = pdf_loader.load() + web_loader.load() + arxiv_loader.load()



In [24]:

# Split documents

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = text_splitter.split_documents(all_docs)



In [25]:
# Create and save FAISS index
texts = [doc.page_content for doc in split_docs]
vector_store = FAISS.from_texts(texts, embedding=embedding_model)
vector_store.save_local("faiss_index")

vector_store = FAISS.load_local(
    "faiss_index",
    embeddings=embedding_model,
    allow_dangerous_deserialization=True
)


In [None]:

#  Define query
query = "What is heat ?"

# Convert the query into an embedding
query_embedding = embedding_model.embed_query(query)

#  Perform a similarity search on the vector store
results = vector_store.similarity_search_by_vector(query_embedding, k=5)

#  Display the results
for idx, doc in enumerate(results, 1):
    print(f"Result {idx}:\n{doc.page_content}\n")


Result 1:
(Vogt, 2010). Another example are comic strip 
problems, which are, again similarly to NSPs, based on 
some comic strip sequence featuring science-related 
issues and assking pupils to work on various questions 
and related tasks (Kuhn et al. 2010b). 
 
On a cool morning, you would like to heat your bathroom (A = 20 m2) 
from 12 °C up to 20 °C with a heater (see left). 
1. What time does it need to warm up your bathroom?

Result 2:
7th/8th grade, and “electrical energy” in 9th/10th grade; 
see figures 1, 3 and 5. For the advertisement tasks (see 
sect. 5), the topics were “thermal capacity” and “caloric 
value” (Vogt, 2010). 
Studies were carried out within regular classroom 
teaching as a comparison of intervention (“treatment”) 
and “control” classes (EC and CC, respectively).6 In the 
experimental condition, classes worked with newspaper 
story problems (see Fig. 1a and 5a), while classes in the

Result 3:
2. What is the costs for heating the bathroom, given a price of 30 