In [1]:
!pip install -qU \
  transformers==4.31.0 \
  sentence-transformers==2.2.2 \
  pinecone-client==2.2.2 \
  datasets==2.14.0 \
  accelerate==0.21.0 \
  einops==0.6.1 \
  langchain==0.0.240 \
  xformers==0.0.20 \
  bitsandbytes==0.41.0 \
  pypdf==3.17.4 \
  pinecone-client==2.2.2

In [2]:

from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_id = 'intfloat/e5-base-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

onnx/config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/436M [00:00<?, ?B/s]

onnx/special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

onnx/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

onnx/tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

onnx/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

In [3]:

docs = [
    "this is one document",
    "and another document"
]

embeddings = embed_model.embed_documents(docs)

print(f"We have {len(embeddings)} doc embeddings, each with "
      f"a dimensionality of {len(embeddings[0])}.")


We have 2 doc embeddings, each with a dimensionality of 768.


In [4]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [6]:
doc=read_doc('/content/')
doc

[Document(page_content='DRAFT VERSION JANUARY 1, 2024\nTypeset using L ATEXtwocolumn style in AASTeX63\nBinary mergers in the centers of galaxies: synergy between stellar flybys and tidal fields\nMILAWINTER -GRANI ´C,1CRISTOBAL PETROVICH ,1, 2AND VALENTÍN PEÑA-DONAIRE1\n1Instituto de Astrofísica, Pontificia Universidad Católica de Chile, Av. Vicuña Mackenna 4860, 782-0436 Macul, Santiago, Chile\n2Millennium Institute of Astrophysics MAS, Nuncio Monseñor Sótero Sanz 100, Of. 104, 750-0000 Providencia, Santiago, Chile\nABSTRACT\nGalactic centers are very dense and dynamically active environments, often harboring a nuclear star cluster and\nsupermassive black hole at their cores. Binaries in these environments are subject to strong tidal fields that can\nefficiently torque its orbit, exciting near unity eccentricities that ultimately lead to their merger. In turn, the\nfrequent close interactions due to passing stars impulsively perturb the orbit of the binary, generally softening\ntheir 

In [7]:
def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return doc

In [8]:
doc=chunk_data(docs=doc)
len(doc)

73

In [9]:
import pinecone
from langchain.vectorstores import Pinecone

In [10]:
## Vector Search DB In Pinecone
pinecone.init(
    api_key="_PINECONE_API_KEY_",
    environment="_PINECONE_ENVIRONMENT_"
)
index_name="e5-llama2-rag"

In [11]:
index=Pinecone.from_documents(doc,embed_model,index_name=index_name)

In [12]:
# # If you have already setup and stored vectors in pinecone, you can import index this way.
# text_field = "text"
# index = pinecone.Index(index_name=index_name)
# index = Pinecone(
#     index, embed_model.embed_query, text_field
# )

In [13]:
def retrieve_query(query,k=2):
    matching_results=index.similarity_search(query,k=k)
    return matching_results

In [14]:
from langchain.chains.question_answering import load_qa_chain

In [16]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
hf_auth = '_HF_API_KEY_'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model loaded on cuda:0


In [17]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



In [18]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [20]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [27]:
# Check how the model answers without the context of the document.
llm(prompt="Can you tell me ab0ut Dynamical regimes in the galactic center")

"?\n Unterscheidung between different types of dynamical regimes is important for understanding the structure and evolution of the Galactic center. In this section, we will discuss the different types of dynamical regimes that have been identified in the Galactic center using numerical simulations.\n\nOne of the earliest studies on the dynamics of the Galactic center was conducted by \\citet{1987ApJ...323..654M}. They used a set of simplified models to represent the gravitational potential of the central supermassive black hole (SMBH) and the surrounding stars and gas. They found that the SMBH dominates the gravitational potential in the inner few parsecs of the Galactic center, while the stellar and gas components are more important at larger distances. This study laid the foundation for subsequent investigations into the dynamics of the Galactic center.\n\nSince then, numerous simulations have been performed to study the dynamics of the Galactic center. These simulations have reveale

In [22]:
chain=load_qa_chain(llm,chain_type="stuff")

In [29]:
def retrieve_answers(query):
    doc_search=retrieve_query(query)
    #print(doc_search)
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [37]:
# We can see the difference when rag is used
our_query = "Can you tell me ab0ut Dynamical regimes in the galactic center"
answer = retrieve_answers(our_query)
answer.splitlines()

[' I can certainly try! Based on the provided text, it seems that dynamical regimes in the galactic center refer to the different ways in which binaries move and interact with each other and their surroundings. The text mentions that there are different regimes depending on the distance between the binaries, with tighter binaries experiencing less diffusion in their angular momentum and wider binaries experiencing more diffusion. Additionally, the text notes that the maximum eccentricities achieved by a population of binaries in the galactic center can be affected by their initial conditions, such as their initial eccentricities and inclinations. However, I do not have enough information from the text to provide a detailed answer to your question.']