### Importing packages

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import accelerate
import torch
import time
from pprint import pprint

### Declaring text generation model, tokenizer, computational device and optional streamer

In [5]:
# setting device
gpu=0
device = torch.device(f"cuda:{gpu}" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(device)
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 2070 SUPER'

In [6]:
# Define model name and hf token
name = "TheBloke/Llama-2-7b-Chat-GPTQ"
# name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"

# hugginf face auth token
# file_path = "../../huggingface_credentials.txt"
# with open(file_path, "r") as file:
#     auth_token = file.read().strip()

In [7]:
# Create tokenizer
tokenizer = AutoTokenizer.from_pretrained(name
    # ,cache_dir='./model/'
    # ,use_auth_token=auth_token
    ,device_map='cuda'                 
    )

In [8]:
# from huggingface_hub import notebook_login
# notebook_login()

In [9]:
# Define model
model = AutoModelForCausalLM.from_pretrained(name
    ,cache_dir=r"C:\Users\user2\.cache\huggingface\hub"
    # ,cache_dir='./model/'
    # ,use_auth_token=auth_token
    ,device_map='cuda'  
    # , torch_dtype=torch.float16
    # ,low_cpu_mem_usage=True
    # ,rope_scaling={"type": "dynamic", "factor": 2}
    # ,load_in_8bit=True,
    ).to(device)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
binary_path: D:\NLP 1\venv\Lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll
CUDA SETUP: Loading binary D:\NLP 1\venv\Lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll...


In [10]:
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

### Declare inference function

In [11]:
def llm_inference(plain_text, model, tokenizer, device, streamer=None, max_length=4000, ):
    input_ids = tokenizer(
        plain_text,
        return_tensors="pt",
        truncation=True,
        max_length=max_length,
        )['input_ids'].to(device)
    
    output_ids = model.generate(input_ids
                        ,streamer=streamer
                        ,use_cache=True
                        ,max_new_tokens=float('inf')
                       )
    answer = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
    return answer

### Generating texts using a trained model

In [61]:
text = "what are the steps to train a machine learning model? explain in less than 100 words"
res = llm_inference(text, model, tokenizer, device, streamer=streamer,)
res



"what are the steps to train a machine learning model? explain in less than 100 words.\nTo train a machine learning model, you typically follow these steps:\n1. Collect and preprocess data.\n2. Choose a machine learning algorithm.\n3. Split the data into training and validation sets.\n4. Train the model on the training set.\n5. Evaluate the model's performance on the validation set.\n6. Fine-tune the model as needed.\n7. Test the final model on new data."

### Setup Vector database

In [12]:
import chromadb
from llama_index import VectorStoreIndex, SimpleDirectoryReader, get_response_synthesizer
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.prompts.prompts import SimpleInputPrompt
from llama_index.llms import HuggingFaceLLM
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import set_global_service_context
from llama_index import ServiceContext
from llama_index import VectorStoreIndex, download_loader
from llama_index import SimpleDirectoryReader
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.postprocessor import SimilarityPostprocessor
from llama_index.vector_stores import MilvusVectorStore
from pathlib import Path

### Chroma db

In [13]:
db = chromadb.PersistentClient(path=r"../../vector_dbs/admin/vdb_satellite2")

# get collection
chroma_collection = db.get_or_create_collection("default")

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

In [14]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [15]:
# Create a system prompt
system_prompt = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as
helpfully as possible, while being safe.
If a question does not make any sense, or is not factually coherent, explain
why instead of answering something not correct. If you don't know the answer
to a question, please express that you do not have informaion or knowledge in
that context and please don't share false information.
Try to be exact in information and numbers you tell.
Your goal is to provide answers based on the information provided and your other
knowledge.<</SYS>>
"""

query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")

In [16]:
llm = HuggingFaceLLM(context_window=4096,
                     max_new_tokens=512,
                     system_prompt=system_prompt,
                     query_wrapper_prompt=query_wrapper_prompt,
                     model=model,
                     tokenizer=tokenizer)

embeddings = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
)

In [17]:
# Create new service context instance
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    chunk_overlap=20,
    llm=llm,
    embed_model=embeddings
)

# And set the service context
set_global_service_context(service_context)

### Load Vector DB

In [18]:
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

### Insert a single document into the vector db

In [37]:
PyMuPDFReader = download_loader("PyMuPDFReader")
loader = PyMuPDFReader()

# Load documents
# doc_dir = r"D:\NLP 1\RAG-webapp\documents_db\Sattelite imagery article scripts.pdf"
doc_dir = r"D:\NLP 1\RAG-webapp\documents_db\CLIMATE_CHANGE_2023.pdf"
document = loader.load(file_path=Path(doc_dir), metadata=False)

# Create indexes
for doc in document:
    index.insert(doc, )

### Insert directory of documents into the vector db

In [16]:
# load some documents
documents = SimpleDirectoryReader(r"C:\Users\user2\Desktop\RAG_Docs").load_data()

# create your index
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [35]:
query_engine = index.as_query_engine()
########## Or ###########
#Customizing query engine

### Customizing query engine

In [19]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=3,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(streaming=True)

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.0)],
)

### Inferencing

In [20]:
# create a query engine and query
# response = query_engine.query("who studied Master of Science in Management with a background in Civil Engineering?")
# response = query_engine.query("how many gold medals Iranian youth won in 2023 chess competitions?")
# response = query_engine.query("describe key points of 2023 climate change?")
# response = query_engine.query("how much headline inflation increased after storm shock?")
response = query_engine.query("how many ship detection methods are there? just name and use no more than 70 words")
# response = query_engine.query("say something")
# response = query_engine.query("how many gold medals Iranian youth won in 2023 chess competitions?")
response.print_response_stream()
# ans = []
# for txt in response.response_gen:
#     ans.append(txt)
#     print(txt, sep="")
# response



 There are several ship detection methods, including:
1. Ship detection using satellite imagery with machine learning models
2. Ship detection using SAR imagery with deep learning
3. Ship detection using multi-spectral images with transfer learning
4. Ship detection using VHR images with self-supervised learning
5. Ship detection using Sentinel-2 images with few-shot learning
6. Ship detection using land use and land cover classification
7. Ship detection using change detection techniques.</s>

In [27]:
from pprint import pprint

In [28]:
pprint(response.source_nodes[0].node.text)

('as digital photographs), or by pretraining a neural network on the satellite '
 'image domain. The latter can be \n'
 'done through an unsupervised pipeline using self-supervised learning (SSL) '
 '[6], a contrastive learning \n'
 'paradigm that extracts useful patterns, learns invariances and disentangles '
 'causal factors in the training data. \n'
 'Features learned this way are better adapted for transfer learning of '
 'few-shot object detectors. We propose \n'
 'to use this paradigm to create a ship detector with few data. \n'
 ' \n'
 'For VHR images, a large amount of literature exists, with the number of '
 'works follow- ing the increasing \n'
 'number of sensors and the quantity of publicly available data [7,8]. Many of '
 'these approaches focused on \n'
 'detecting ships with classical image processing pipelines: image processing '
 'using spectral indices or histograms \n'
 '(e.g., sea-land segmentation, cloud removal), ship candidate extraction '
 '(e.g., threshold, ano

In [39]:
for txt in response.response_gen:
    print(txt)

In [1]:
for node in response.source_nodes:
    print(node.score)
response.source_nodes[0]

NameError: name 'response' is not defined