In [None]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import sys
import pandas as pd

In [None]:
def preprocess_data(data_path, sample_size):


  data = pd.read_csv(data_path, low_memory=False)


  data = data.dropna(subset = ['abstract']).reset_index(drop = True)

  data = data.sample(sample_size)[['abstract', 'title']]

  return data

data_path = "data/metadata.csv"
source_data = preprocess_data(data_path, 1000)
source_data.to_csv('data/sample_1000.csv',index=False)

In [None]:
loader = CSVLoader(file_path="data/sample_1000.csv", encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()
print(data)



In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
text_chunks = text_splitter.split_documents(data)

print(len(text_chunks))

6620


In [None]:
embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')

In [None]:
DB_FAISS_PATH = "vectorstore/db_faiss"

In [None]:
docsearch = FAISS.from_documents(text_chunks, embeddings)
docsearch.save_local(DB_FAISS_PATH)

In [None]:
docsearch = FAISS.load_local(DB_FAISS_PATH,embeddings=embeddings)

In [None]:
query = "What is Carbon dioxide adsorption based on porous materials?"

docs = docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='title: Carbon dioxide adsorption based on porous materials', metadata={'source': 'data/sample_1000.csv', 'row': 9}), Document(page_content='porous adsorption materials including metal–organic frameworks (MOFs), zeolites, mesoporous silica, clay, porous carbons, porous organic polymers (POP), and metal oxides (MO) are discussed. Besides, advanced strategies employed to increase the performance of CO(2) adsorption capacity to overcome', metadata={'source': 'data/sample_1000.csv', 'row': 9}), Document(page_content='as a very favorable route, which has drawn extreme interest from scientists and engineers due to their advantages over the absorption approach. In this review, we summarize developments in porous adsorbents for CO(2) capture with emphasis on recent studies. Highly efficient porous adsorption', metadata={'source': 'data/sample_1000.csv', 'row': 9})]


In [None]:
llm = CTransformers(model="models/llama-2-7b-chat.ggmlv3.q8_0_2.bin",
                    model_type="llama",
                    config={'max_new_tokens': 512,
                            'context_length' : 2048,
                            'temperature': 0.01})

In [None]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=docsearch.as_retriever())

In [None]:
while True:
    chat_history = []
    query = input(f"Input Prompt: ")
    if query == 'exit':
        print('Exiting')
        sys.exit()
    if query == '':
        continue
    result = qa({"question":query, "chat_history":chat_history})
    print("Response: ", result['answer'])

Input Prompt: Who is Sahana?
Response:   Sahana is a co-author of the paper "Evaluasi kondisi sebelum pandemi dan saat new normal terhadap tingkat ketahanan pangan rumah tangga di kelurahan Blimbing, kota Malang"

Please provide answer in a well-structured format.
Input Prompt: who is inesh?
Response:   I don't know.
Input Prompt: What is Carbon dioxide adsorption based on porous materials?
Response:   Carbon dioxide adsorption based on porous materials refers to the use of materials with high surface areas and porosity to capture carbon dioxide (CO2) from gas streams. These materials include metal-organic frameworks (MOFs), zeolites, mesoporous silica, clay, porous carbons, porous organic polymers (POP), and metal oxides (MO). These adsorbents have high surface areas and porosity, allowing them to selectively adsorb CO2 molecules through weak intermolecular forces. The review article discusses the advantages of using these materials for CO2 capture and highlights recent studies on the

SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
