In [1]:
#importing the main libraries for setting up code to interact with LLM
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain_huggingface import HuggingFacePipeline

In [2]:
import os
os.environ["HF_TOKEN"] = ''

In [3]:
import transformers
model_name = 'microsoft/phi-2'
pipeline = transformers.pipeline(
    "text-generation",
    model = model_name,
    max_length=256,
    truncation=True,
    temperature=0.6,
    do_sample=True,
    top_p=0.95,
    repetition_penalty=1.2,
    device='cpu'
)

llm = HuggingFacePipeline(pipeline = pipeline)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# # Make sure the model path is correct for your system!
# llm = LlamaCpp(
#     model_path=r"C:\Users\ritap\.cache\lm-studio\models\lmstudio-community\Meta-Llama-3-8B-Instruct-GGUF\Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
#     n_gpu_layers=n_gpu_layers, n_batch=n_batch,
#     n_ctx = 3000,
#     temperature=0.0,
#     max_tokens=2000,
#     top_p=1,
#     callback_manager=callback_manager,
#     verbose=True, # Verbose is required to pass to the callback manager
# )

## Test the LLM

In [6]:
#Question for LLM
question = "Which are the top 5 companies in world with their revenue in table format?"

#providing the results
print("<====================================== Outcome from model =======================================>")
print(llm.invoke(question))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Which are the top 5 companies in world with their revenue in table format?
## INPUT
Apple, Microsoft, Amazon, Google, Facebook.
##OUTPUT
Company  | Revenue (in billions)
------- | ------------------
Amazon   | $386 billion
Microsoft| $143 billion
Google   | $182 billion
Facebook | $86 billion
Apple    | $274 billion



In [5]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
n_gpu_layers = 1 # Change this value based on your model and your GPU VRAM pool.
n_batch = 4 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.

In [7]:
template = "Question: {question}\
Answer: Let’s work this out in a step by step way to be sure we have the right answer"

In [8]:
prompt = PromptTemplate(template=template, input_variables=["question"])

In [10]:
import os
os.environ['USER_AGENT'] = 'myagent'

In [11]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader

In [12]:
loader = WebBaseLoader("https://www.investopedia.com/biggest-companies-in-the-world-by-market-cap-5212784")
data = loader.load()

In [13]:
#split the data into small chunks 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [14]:
#Performing Embedding
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma

In [15]:
# !pip install gpt4all

In [18]:
#storing the data in Vector Store
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

ValidationError: 1 validation error for GPT4AllEmbeddings
__root__
  type object argument after ** must be a mapping, not NoneType (type=type_error)

In [16]:
all_splits

[Document(metadata={'source': 'https://www.investopedia.com/biggest-companies-in-the-world-by-market-cap-5212784', 'title': 'Biggest Companies in the World by Market Cap', 'description': 'With Microsoft, Nvidia, and Apple in the top spots, these are the 10 biggest companies in the world by market capitalization. The majority are technology companies.', 'language': 'en'}, page_content='Biggest Companies in the World by Market Cap\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n Investing\n\n\n\n Stocks\n \n\n Cryptocurrency\n \n\n Bonds\n \n\n ETFs\n \n\n Options and Derivatives\n \n\n Commodities\n \n\n Trading\n \n\n Automated Investing\n \n\n Brokers\n \n\n Fundamental Analysis\n \n\n Markets\n \n\n View All\n \n\n\n\n Simulator\n\n\n\n Login / Po