## **Website Bot**

In [1]:
import os
import sys
import torch
from langchain.document_loaders import TextLoader,UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain import HuggingFacePipeline
from huggingface_hub import notebook_login

In [2]:
import nltk

# Download the required NLTK packages
try:
    nltk.download("punkt")
    nltk.download("average_perceptron_tagger")
    print("All required NLTK packages downloaded successfully!")
except Exception as e:
    print(f"An error occurred while downloading NLTK packages: {e}")


All required NLTK packages downloaded successfully!


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Error loading average_perceptron_tagger: Package
[nltk_data]     'average_perceptron_tagger' not found in index


In [3]:
URLs = [
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablem-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/',
    'https://www.datacamp.com/blog/top-open-source-llms'
]

In [4]:
loader = UnstructuredURLLoader(urls=URLs)
data = loader.load()


In [5]:
print(data)

[Document(metadata={'source': 'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb'}, page_content='Open in app\n\nSign up\n\nSign in\n\nWrite\n\nSign up\n\nSign in\n\nPaper Review\n\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\n\nLlama 2: one of the best open source models\n\nAndrew Lukyanenko\n\nFollow\n\nPublished in\n\nGoPenAI\n\n15 min read\n\nJul 20, 2023\n\n--\n\nProject link\n\nModel link\n\nPaper link\n\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their ap

In [6]:
text_splitter = CharacterTextSplitter(separator='\n',chunk_size = 1000, chunk_overlap = 200)
text_chunks = text_splitter.split_documents(data)

In [7]:
len(text_chunks)

81

In [8]:
embedding = HuggingFaceEmbeddings()

  embedding = HuggingFaceEmbeddings()
  embedding = HuggingFaceEmbeddings()


In [9]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [10]:
query_result = embedding.embed_query("How are you?")
print(query_result)
print(len(query_result))

[0.02710617147386074, 0.011331816203892231, -0.0019524242961779237, -0.036951322108507156, 0.017764940857887268, 0.0009032218949869275, -0.0338648222386837, 0.013378400355577469, 0.017730558291077614, -0.01324674766510725, -0.040281325578689575, -0.015285267494618893, -0.012560340575873852, 0.015230934135615826, 0.015512358397245407, -0.05751274526119232, -0.017129529267549515, -0.061840757727622986, -0.01876130886375904, -0.007223892956972122, -0.049612533301115036, 0.011142097413539886, 2.9735640055150725e-05, -0.009051459841430187, 0.0534287765622139, 0.010582653805613518, 0.03314787521958351, -0.004505351185798645, -0.006172338966280222, 0.0620807409286499, -0.02728528156876564, 0.029826559126377106, 0.024574721232056618, -0.02151179127395153, 1.6617834717180813e-06, 0.04796354100108147, -0.02351686917245388, -0.054577313363552094, 0.07369424402713776, -0.03634387254714966, 0.023755066096782684, -0.06763104349374771, 0.007463259622454643, 0.07609966397285461, -0.012689250521361828,

In [11]:
#from transformers import AutoModelForCausalLM, AutoTokenizer

#model_name = "meta-llama/Llama-2-7b-chat-hf"
#token = "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"

# Load the tokenizer and model
#tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
#model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token)

#print("Model and tokenizer loaded successfully.")


In [12]:
#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

In [13]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
model = "meta-llama/Llama-2-7b-chat-hf"

In [15]:
tokenizer = AutoTokenizer.from_pretrained(model,use_auth_token=True)

model = AutoModelForCausalLM.from_pretrained(model,device_map='auto',
                                             torch_dtype = torch.float16,
                                             use_auth_token = True,
                                             load_in_8bit = True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend


RuntimeError: CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend

In [None]:
pipe = pipeline(
    "text_generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype = torch.float16,
    use_auth_token = True,
    max_new_tokens = 512,
    do_sample = True,
    top_k = 30,
    num_return_sequences = 1,
    eos_token_id = tokenizer.eos_token_id
)

In [None]:
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})

In [None]:
llm.predict("What is vicuna?")

In [None]:
print("The End")