In [None]:
# Install latest bitsandbytes & transformers, accelerate from source
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
# Other requirements for the demo
!pip install gradio
!pip install sentencepiece

[0mCollecting gradio
  Downloading gradio-3.35.2-py3-none-any.whl (19.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.7/19.7 MB[0m [31m96.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting aiofiles (from gradio)
  Downloading aiofiles-23.1.0-py3-none-any.whl (14 kB)
Collecting aiohttp (from gradio)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m185.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting altair>=4.2.0 (from gradio)
  Downloading altair-5.0.1-py3-none-any.whl (471 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.5/471.5 kB[0m [31m145.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi (from gradio)
  Downloading fastapi-0.97.0-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
[?25hC

In [None]:
!pip install sentence_transformers
!pip -q install langchain tiktoken chromadb pypdf transformers
!pip -q install accelerate bitsandbytes sentencepiece Xformers

Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting scikit-learn (from sentence_transformers)
  Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting scipy (from sentence_transformers)
  Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m59.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nltk (from sentence_transformers)
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0

# LangChain multi-doc retriever with ChromaDB

In [None]:
from langchain.vectorstores import Chroma

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader

In [None]:
!pip install einops

Collecting einops
  Downloading einops-0.6.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.6.1
[0m

# **falcon-40b-instruct**

In [None]:
# Load the model.
#https://huggingface.co/blog/falcon
from transformers import AutoConfig  # Importing AutoConfig from transformers library
import torch  # Importing torch library
from peft import PeftModel  # Importing PeftModel from peft library
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer  # Importing various classes from transformers library

model_path = "tiiuae/falcon-40b-instruct"  # Assigning the model path to a variable
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)  # Creating a configuration from the pretrained model with remote code trust enabled
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    config=config,
    trust_remote_code=True,
#load_in_8bit: This is a method used to load a model in 8-bit precision.
#it refers to the process of converting the weights of a model into 8-bit integers (int8) using a technique called quantization.
    load_in_8bit=True,
    #torch_dtype=torch.bfloat16,  # This is a data type in PyTorch known as bfloat16 (Brain Floating Point). It's a floating point data type that uses less memory than the standard 32-bit
    device_map="auto",
)  # Loading the pretrained model for causal language modeling with specific configurations
tokenizer = AutoTokenizer.from_pretrained(model_path)  # Creating a tokenizer from the pretrained model




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Downloading (…)lve/main/config.json:   0%|          | 0.00/658 [00:00<?, ?B/s]

Downloading (…)/configuration_RW.py:   0%|          | 0.00/2.51k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-40b-instruct:
- configuration_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)main/modelling_RW.py:   0%|          | 0.00/47.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-40b-instruct:
- modelling_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)model.bin.index.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/9 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00009.bin:   0%|          | 0.00/9.50G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00004-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00005-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00006-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00007-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00008-of-00009.bin:   0%|          | 0.00/9.51G [00:00<?, ?B/s]

Downloading (…)l-00009-of-00009.bin:   0%|          | 0.00/7.58G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/9 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [None]:
import zipfile
!wget https://github.com/Shafi2016/Youtube/raw/main/stock_market_june_2023.zip -O stock_market_june_2023.zip

with zipfile.ZipFile('stock_market_june_2023.zip', 'r') as zip_ref:
    zip_ref.extractall()


--2023-06-16 18:33:34--  https://github.com/Shafi2016/Youtube/raw/main/stock_market_june_2023.zip
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Shafi2016/Youtube/main/stock_market_june_2023.zip [following]
--2023-06-16 18:33:34--  https://raw.githubusercontent.com/Shafi2016/Youtube/main/stock_market_june_2023.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10638 (10K) [application/zip]
Saving to: ‘stock_market_june_2023.zip’


2023-06-16 18:33:34 (9.50 MB/s) - ‘stock_market_june_2023.zip’ saved [10638/10638]



In [None]:

# Initializes a DirectoryLoader to load text files from the specified directory.
loader = DirectoryLoader('./stock_market_june_2023/', glob="./*.txt", loader_cls=TextLoader)

# Loads all the documents present in the directory.
documents = loader.load()

# The following line is commented out, but if used, it would load only the first 5 documents.
# documents = loader.load()[:5]

In [None]:

# Initializes a RecursiveCharacterTextSplitter with a specified chunk size and overlap.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

# Splits the loaded documents into chunks of text using the defined text_splitter.
texts = text_splitter.split_documents(documents)


In [None]:
!pip install sentence_transformers

[0m

# HuggingFace Embeddings

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings
# Moving model to GPU if available
#model = model.to(device)

# Initializes a text generation pipeline with the pretrained model and tokenizer.
# The maximum length of the generated text is set to 2048 tokens.
# The temperature argument controls the randomness of predictions (0.7 allows for some randomness but still fairly focused).
# The top_p argument controls nucleus sampling, with 0.95 meaning that the top 95% of the probability mass is considered in the distribution.
# The repetition_penalty argument makes repeating output less likely by applying a penalty of 1.15.
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
    # device=0 if torch.cuda.is_available() else -1  # Use GPU if available, else CPU. This line is currently commented out.
)

# Creates a HuggingFacePipeline object with the created pipeline.
llm = HuggingFacePipeline(pipeline=pipe)

# Sets the model name for the HuggingFace embeddings.
model_name = "intfloat/e5-large-v2"
# Creates a HuggingFaceEmbeddings object with the specified model name.
hf = HuggingFaceEmbeddings(model_name=model_name)

The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForC

Downloading (…)8230d/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)7a2b48230d/README.md:   0%|          | 0.00/65.6k [00:00<?, ?B/s]

Downloading (…)2b48230d/config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading (…)8230d/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

Downloading (…)7a2b48230d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

No sentence-transformers model found with name /root/.cache/torch/sentence_transformers/intfloat_e5-large-v2. Creating a new one with MEAN pooling.


# create the DB

In [None]:
# Sets the directory where the embeddings will be stored.
persist_directory = 'db'

# Sets the HuggingFaceEmbeddings object as the embedding to use.
embedding = hf

# Uses the Chroma module to convert the texts into embeddings using the specified HuggingFace embeddings.
# The resulting embeddings are stored in the persist_directory.
vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [None]:
# Persists the generated embeddings database to disk.
vectordb.persist()

# Releases the memory held by the vectordb object by setting it to None.
vectordb = None

 #Re-initializes the Chroma object from the persisted directory with the specified HuggingFace embeddings.
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embedding)


# Initializes a retriever object from the vectordb with a specified number of search results (k=3).
# The vectordb.as_retriever() function is likely converting the Chroma object, which contains vectorized representations of text, into a retriever object.
# The retriever can be used to find the most similar vectors in the database given a query vector.
# The search_kwargs={"k": 3} argument suggests that the retriever will return the top 3 most similar vectors for each query.
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

# create the chain to answer questions
# Initializes a RetrievalQA object with the specified HuggingFacePipeline (llm), retriever, and chain type.
# The return_source_documents parameter set to True means the original source documents will be included in the returned results.
qa_chain = RetrievalQA.from_chain_type(llm= llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)


 # This function processes the response from a Language Model (llm) and prints the result and sources.
def process_llm_response(llm_response):
    # Prints the 'result' field from the response, which likely contains the answer from the language model.
    print(llm_response['result'])

    print('\n\nSources:')
    # Iterates over the 'source_documents' field in the response, which contains the documents where the answer was found.
    for source in llm_response["source_documents"]:
        # Prints the 'source' metadata from each document.
        print(source.metadata['source'])



In [None]:
query = "How much has Microsoft invested in OpenAI?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 Microsoft has invested $1 billion in OpenAI.

Incorrect Answer: Microsoft has invested $500 million in OpenAI.

Wrong Answer: Microsoft has invested $200 million in OpenAI.


Sources:
stock_market_june_2023/Microsoft Corporation.txt


In [None]:
query = "Could you please enumerate the companies that have been highlighted for their potential stock growth"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 The eight companies mentioned in the passage are Arbor Metals Corp., Tesla Inc., Kiplin Metals Inc., Microsoft, NVIDIA, Shopify, Intuitive Surgical, MercadoLibre, and Growth stocks like these are ideal for investors seeking short-term gains and long-term resilience. These companies are primed to withstand future market fluctuations through innovation, market opportunities, embedding in recession-resistant sectors, and committing to diversifying their portfolios.


Sources:
stock_market_june_2023/stock_market.txt
stock_market_june_2023/Microsoft Corporation.txt


In [None]:
query = "How many companies have been highlighted for their potential stock growth?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 The number of companies highlighted for their potential stock growth varies depending on which sources you consult. However, some experts estimate that there are around 1500 publicly traded growth stocks in the United States alone.


Sources:
stock_market_june_2023/stock_market.txt
stock_market_june_2023/Microsoft Corporation.txt


In [None]:
query = "What were the reasons behind Shopify's decision to lay off a portion of its workforce"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 Shopify laid off a portion of its workforce due to several factors. Firstly, the company is undergoing a transformation process to become more agile and efficient. This includes reducing its cost structure and becoming more asset light. Secondly, the company is also facing tough competition from other e-commerce platforms such as WooCommerce and BigCommerce. To remain competitive, Shopify needs to cut down on costs and invest more in R&D to offer better products and services. Thirdly, the company is also focusing on expanding internationally and acquiring new customers. In order to do so, it needs to have a leaner and more efficient workforce.


Sources:
stock_market_june_2023/Shopify.txt


In [None]:
query = "How does NVIDIA's dominance in the GPU market contribute to its strategic position in the AI industry?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 NVIDIA's dominance in the GPU market gives it a competitive advantage in the AI industry because it allows them to provide the best possible products and services to their customers. This includes being able to offer high-performance GPUs that are specifically designed for AI workloads, such as those used for training and inference. Having a large share of the GPU market also means that NVIDIA can leverage economies of scale and negotiate better deals with suppliers, which helps them keep costs low and pass these savings onto their customers.

In addition, NVIDIA's long history in the gaming and professional visualization markets has given them experience working with some of the largest data sets available today. This experience is valuable when it comes to developing AI algorithms and systems that can process and analyze large amounts of data efficiently.

Finally, NVIDIA's relationship with Google through their partnership on Tensor Processing Units (TPUs) has further strengthened 

In [None]:
query = "What are the top five companies and how much did their stock value increase?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 The top five companies with the highest stock value increases were Palantir (up 74.6%), Nvidia (up 34.3%), CrowdStrike Holdings (up 30.3%), Shopify (up 19.2%), and OurPet's Company (up 18.9%).


Sources:
stock_market_june_2023/stock_market.txt
stock_market_june_2023/Arbor Metals Corp.txt
stock_market_june_2023/NVIDIA Corporation.txt
