## 1. Package Installation

In [None]:
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --upgrade

In [None]:
# !pip install langchain einops accelerate transformers bitsandbytes scipy

In [None]:
# !pip install xformers sentencepiece

In [None]:
# !pip install llama-index==0.7.21 llama_hub==0.0.19

In [None]:
# !pip install llama-index

In [None]:
# !pip install sentence-transformers

In [None]:
# !pip install accelerate

In [None]:
# !pip install bitsandbytes

In [None]:
# !pip install pypdf

## 2. Loading Model & Tokenizer and Declaring inference function

In [28]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [1]:
# Import transformer classes for generaiton
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import accelerate
import torch
import time
from pprint import pprint

In [2]:
#Choose whether you want to use CPU or GPU
gpu=0
device = torch.device(f"cuda:{gpu}" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(device)
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 2070 SUPER'

In [3]:
# Define variable to hold llama2 weights naming

name = "TheBloke/Llama-2-7b-Chat-GPTQ"
# name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"

# Set auth token variable from hugging face
auth_token = "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"

In [4]:
# Create tokenizer
tokenizer = AutoTokenizer.from_pretrained(name
    # ,cache_dir='./model/'
    ,use_auth_token=auth_token
    ,device_map='cuda'                 
    )



In [5]:
# Create model
model = AutoModelForCausalLM.from_pretrained(name
    ,cache_dir=r"C:\Users\user2\.cache\huggingface\hub"
    # ,cache_dir='./model/'
    ,use_auth_token=auth_token
    ,device_map='cuda'  
    # , torch_dtype=torch.float16
    # ,low_cpu_mem_usage=True
    # ,rope_scaling={"type": "dynamic", "factor": 2}
    # ,load_in_8bit=True
                                            )
model.to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)




Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
binary_path: D:\NLP 1\venv\Lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll
CUDA SETUP: Loading binary D:\NLP 1\venv\Lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll...




In [6]:
# Declaring inference function
def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=100):
  # Tokenize
  input_ids = tokenizer.encode(
          text,
          return_tensors="pt",
          truncation=True,
          max_length=max_input_tokens
  )

  # Generate
  device = model.device
  generated_tokens_with_prompt = model.generate(
    input_ids=input_ids.to(device),
    max_length=max_output_tokens
  )

  # Decode
  generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

  # Strip the prompt
  generated_text_answer = generated_text_with_prompt[0][len(text):]

  return generated_text_answer

## *. Prompt the model

In [7]:
# Enter the query for prompting into your LLM
query = " what tech advances are most likely to become dangerous in future?"

In [36]:
# Setup a prompt and Pass the prompt to the tokenizer
# prompt = f"""### User: {query}
          ### Assistant:"""
prompt = f"{query}"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

In [37]:
# Actually run the thing
output = model.generate(**inputs
                        ,streamer=streamer
                        ,use_cache=True
                        ,max_new_tokens=float('inf')
                       )





There are several technological advances that have the potential to become dangerous in the future, depending on how they are developed and used. Here are some examples:

1. Artificial Intelligence (AI): While AI has the potential to revolutionize many industries, it also raises concerns about job displacement, bias, and the potential for autonomous weapons.

2. Biotechnology: Advances in biotechnology, such as gene editing and synthetic biology, have the potential to revolutionize healthcare and agriculture, but they also raise concerns about unintended consequences, such as the creation of new diseases or the unintended modification of the human genome.

3. Robotics and Autonomous Systems: As robots and autonomous systems become more advanced, there is a risk of job displacement, as well as the potential for accidents or malfunctions that could cause harm to people or property.

4. Nanotechnology: The manipulation of matter at the nanoscale has the potential to revolutionize many i

In [38]:
# Covert the output tokens back to text
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
pprint(output_text, width=120, )

(' what tech advances are most likely to become dangerous in future? \n'
 '\n'
 'There are several technological advances that have the potential to become dangerous in the future, depending on how '
 'they are developed and used. Here are some examples:\n'
 '\n'
 '1. Artificial Intelligence (AI): While AI has the potential to revolutionize many industries, it also raises '
 'concerns about job displacement, bias, and the potential for autonomous weapons.\n'
 '\n'
 '2. Biotechnology: Advances in biotechnology, such as gene editing and synthetic biology, have the potential to '
 'revolutionize healthcare and agriculture, but they also raise concerns about unintended consequences, such as the '
 'creation of new diseases or the unintended modification of the human genome.\n'
 '\n'
 '3. Robotics and Autonomous Systems: As robots and autonomous systems become more advanced, there is a risk of job '
 'displacement, as well as the potential for accidents or malfunctions that could cause harm

## 3. Setup the LLamaIndex (RAG) - A VectorDatabase for knowledge-augmented inferences

In [7]:
from llama_index.prompts.prompts import SimpleInputPrompt
from llama_index.llms import HuggingFaceLLM
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import set_global_service_context
from llama_index import ServiceContext
from llama_index import VectorStoreIndex, download_loader
from llama_index import SimpleDirectoryReader
from pathlib import Path

In [8]:
# Create a system prompt
system_prompt = """<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as
helpfully as possible, while being safe. Your answers should not include
any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain
why instead of answering something not correct. If you don't know the answer
to a question, please don't share false information.

Your goal is to provide answers based on the information provided and your other knowledges.<</SYS>>
"""
# Throw together the query wrapper
query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")
# query_wrapper_prompt.format(query_str='hello')

In [9]:
# Create a HF LLM using the llama index wrapper
llm = HuggingFaceLLM(context_window=4096,
                    max_new_tokens=512,
                    system_prompt=system_prompt,
                    query_wrapper_prompt=query_wrapper_prompt,
                    model=model,
                    tokenizer=tokenizer)

In [10]:
# Create and dl embeddings instance
embeddings=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
)

In [11]:
# Create new service context instance
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embeddings
)
# And set the service context
set_global_service_context(service_context)

## 4. Loading the documnts for inferencing

### 4-1. On a single document

In [None]:
PyMuPDFReader = download_loader("PyMuPDFReader")
loader = PyMuPDFReader()

In [None]:
# Load documents
doc_dir = r"C:\Users\user2\Desktop\RAG_Docs\RAG-IranEconomy.pdf"
documents = loader.load(file_path=Path(doc_dir), metadata=True)

# Create indexes
index = VectorStoreIndex.from_documents(documents)

### 4-2. On a documents directory (a folder)

In [36]:
# Load documents
batch_docs_dir = r"D:\RAG_Docs"
documents = SimpleDirectoryReader(batch_docs_dir).load_data()

# Create indexes
index = VectorStoreIndex.from_documents(documents)

In [42]:
documents

[Document(id_='866be293-9eff-4527-8538-c7d6344abf3b', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='c008d3c4c80f09edab6bee280ff72c75a7c3a6f4cf0c6f15c5d8fe4f2329b2ac', text='Ad sales boost Time Warner profit\n\nQuarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier.\n\nThe firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL.\n\nTime Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the comp

## 5. Prompting the model (RAG enabled)

### 5-1. streaming = True

In [13]:
# response stream mode
query_engine = index.as_query_engine(streaming=True)

In [15]:
queries = [
    "what is the current status of Iran's oil export, explain with numbers",
    "How much oil Iran exported last year?",
    "how much bilateral trade with UAE iranian officals said to target for next two years?",
    "give me some statistical information about Irans's mineral reserves?"
    "give me some information about the Copernicus Sentinel-2 mission of the European Space Agency?",
    "what was the FY2022 return on equity?",
    "What is Educational background of Farshad Amiri and What he wants to become in future?",
    "Is identifying ships uniquely easy with Sentinel-2 sensors?",
    "آیا بازی سپاهان و الاتحاد برگزار خواهد شد؟",
    " طرح تسهیل و واگذاری اقامت به اتباع خارجی چه شد؟ فقط به زبان فارسی جواب بده",
    "کلیات طرح تاسیس سازمان ملی اقامت چه زمانی و توسط چه کسانی تصویب شد؟",
    "When was the General Assembly of the National Residence Organization approved and by whom?",
    """translate following text from persian to english: 
    ۹مرداد امسال با تصویب مجلس شورای اسلامی، دولت مجاز شد نسبت به تسهیل و واگذاری اقامت به اتباع خارجی
متقاضی اقامت اقدام کند و ۲۱آبان  ۱۴۰۲نمایندگان مجلس شورای اسلامی کلیات طرح تاسیس سازمان ملی اقامت را تصویب
کردند و جزییات طرح را برای بررسی بیشتر به کمیسیون امور داخلی ارجاع دادند. 
    """,
    ]

In [16]:
idx = 1
queries[idx]

'How much oil Iran exported last year?'

In [17]:
query = "تولید محصولات کشاورزی در کشور اتیوپی در سال 2004 چگونه بود؟ با آمار و اعداد توضیح بده"
query_engine.query(query).print_response_stream()

 Based on the information provided in the context, the production of agricultural products in the country of Iran in the year 2004 can be explained as follows:
According to the information provided in the context, the production of agricultural products in Iran in the year 2004 was 12 million tons. This figure includes various types of agricultural products such as rice, wheat, corn, sugar beets, and fruits.
The context also mentions that the government has been taking measures to increase the production of agricultural products, including the distribution of agricultural lands, the provision of credit facilities to farmers, and the establishment of agricultural research centers.
However, it is important to note that the exact amount of agricultural production in Iran in 2004 is not provided in the context, and the figure of 12 million tons is an estimate based on available data.
In conclusion, the production of agricultural products in Iran in the year 2004 was likely around 12 millio

In [17]:
query_engine.query(queries[idx]).print_response_stream()



 Based on the information provided in the article, Iran exported 2 million barrels of oil per day (bpd) in August, placing it third among OPEC producers behind Saudi Arabia and Iraq. However, I cannot provide an exact figure for the amount of oil Iran exported last year as the article does not provide that information.</s>

In [None]:
idx = 1
queries[idx]

query_engine.query(queries[idx]).print_response_stream()

In [None]:
idx = 2
queries[idx]

query_engine.query(queries[idx]).print_response_stream()

### 5-1. streaming = False

In [23]:
# response static mode
query_engine = index.as_query_engine()

In [None]:
queries = [
    "what is the current status of Iran's oil export, explain with numbers",
    "give me some information about the Copernicus Sentinel-2 mission of the European Space Agency?",
    "what was the FY2022 return on equity?",
    "What is Educational background of Farshad Amiri and What he wants to become in future?",
    "Is identifying ships uniquely easy with Sentinel-2 sensors?",
    "آیا بازی سپاهان و الاتحاد برگزار خواهد شد؟",
    " طرح تسهیل و واگذاری اقامت به اتباع خارجی چه شد؟ فقط فارسی جواب بده",
    "کلیات طرح تاسیس سازمان ملی اقامت چه زمانی و توسط چه کسانی تصویب شد؟",
    "When was the General Assembly of the National Residence Organization approved and by whom?",
    """translate following text from persian to english: 
    ۹مرداد امسال با تصویب مجلس شورای اسلامی، دولت مجاز شد نسبت به تسهیل و واگذاری اقامت به اتباع خارجی
متقاضی اقامت اقدام کند و ۲۱آبان  ۱۴۰۲نمایندگان مجلس شورای اسلامی کلیات طرح تاسیس سازمان ملی اقامت را تصویب
کردند و جزییات طرح را برای بررسی بیشتر به کمیسیون امور داخلی ارجاع دادند. 
    """,
    ]

In [None]:
idx = 6
queries[idx]

In [None]:
t1 = time.time()
response = query_engine.query(queries[idx])
print(f"Responsed generated in {(time.time()-t1):0.1f} seconds")

In [None]:
print(response.response)

In [34]:
# query = "تولید محصولات کشاورزی در کشور اتیوپی در سال 2004 چگونه بود؟ با آمار و اعداد توضیح بده"
query = "explain crops production in ethiopia in 2004?"
response = query_engine.query(query)
response.response

" I'm happy to help you with your question! However, I must inform you that the question does not make sense as Ethiopia did not produce any crops in 2004 as it is a landlocked country and does not have the capacity to produce crops. Additionally, the year 2004 is not a valid year for crop production as it is not a valid date.\nI understand that you might have come across this question while reading a document or article, but I kindly request you to verify the information before asking such questions. It is important to ensure the accuracy and reliability of the information we provide.\nIf you have any other questions or concerns, please feel free to ask!"

In [35]:
response.metadata

{'9507312e-ad9b-43f0-afc7-e9d4b486a47a': {'page_label': '5',
  'file_name': 'Sattelite imagery article scripts.pdf'},
 '566ea7c7-23bd-4ced-8a82-bf606ce5a4e4': {'page_label': '10',
  'file_name': 'RAG-IranEconomy.pdf'}}

### 5-1. streaming = False (with translation)

In [None]:
import argostranslate.translate

In [None]:
# response static mode
query_engine = index.as_query_engine()

In [None]:
queries = [
    "what is the current status of Iran's oil export, explain with numbers",
    "give me some information about the Copernicus Sentinel-2 mission of the European Space Agency?",
    "what was the FY2022 return on equity?",
    "What is Educational background of Farshad Amiri and What he wants to become in future?",
    "Is identifying ships uniquely easy with Sentinel-2 sensors?",
    "آیا بازی سپاهان و الاتحاد برگزار خواهد شد؟",
    " طرح تسهیل و واگذاری اقامت به اتباع خارجی چه شد؟ فقط فارسی جواب بده",
    "کلیات طرح تاسیس سازمان ملی اقامت چه زمانی و توسط چه کسانی تصویب شد؟",
    "When was the General Assembly of the National Residence Organization approved and by whom?",
    """translate following text from persian to english: 
    ۹مرداد امسال با تصویب مجلس شورای اسلامی، دولت مجاز شد نسبت به تسهیل و واگذاری اقامت به اتباع خارجی
متقاضی اقامت اقدام کند و ۲۱آبان  ۱۴۰۲نمایندگان مجلس شورای اسلامی کلیات طرح تاسیس سازمان ملی اقامت را تصویب
کردند و جزییات طرح را برای بررسی بیشتر به کمیسیون امور داخلی ارجاع دادند. 
    """,
    ]

In [None]:
idx = 6
queries[idx]

In [None]:
t1 = time.time()
response = query_engine.query(queries[idx])
print(f"Responsed generated in {(time.time()-t1):0.1f} seconds")

In [None]:
translated_response = argostranslate.translate.translate(response.response, 'en', 'fa')
print(translated_response)