In [1]:
!pip install accelerate transformers tokenizers
!pip install bitsandbytes einops
!pip install xformers
!pip install langchain-community
!pip install faiss-gpu
!pip install sentence_transformers
!pip install torch==2.4.0 torchvision>=0.11

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.me

#Initializing the Hugging Face Pipeline

In [None]:
from torch import cuda, bfloat16
import transformers

In [None]:
model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# setting quantization configuration to load large model with less GPU memory
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

In [None]:
from google.colab import userdata
hf_auth = userdata.get('h_api')

In [None]:
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

In [3]:
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0


In [4]:
# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

Model loaded on cuda:0


In [5]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [6]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [7]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [8]:
from transformers import StoppingCriteria, StoppingCriteriaList

In [None]:
# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [9]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,    # langchain expects the full text
    task='text-generation',

    # we pass model parameters here too
    stopping_criteria=stopping_criteria,    # without this model rambles during chat
    temperature=0.1,    # 'randomness' of outputs, 0.0 is the min and 1.0 the max

    max_new_tokens=512,   # max number of tokens to generate in the output
    repetition_penalty=1.1    # without this output begins repeating
    )

In [10]:
res = generate_text("Explain me the difference between Data Lakehouse and Data Warehouse.")
print(res[0]["generated_text"])

Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between data lakehouse and data warehouse is a common topic of discussion in the data engineering community, as both are designed to store large amounts of data but have different architectures, use cases, and benefits. A data lakehouse is a centralized repository that stores all the data from various sources in its raw form, without transforming or processing it. On the other hand, a data warehouse is a structured repository that stores data in a specific format, typically after cleaning, transforming, and aggregating it.

Here are some key differences between a data lakehouse and a data warehouse:

1. Data Structure: A data lakehouse stores data in its raw form, including semi-structured and unstructured data, while a data warehouse stores data in a structured format, typically in a relational database management system (RDBMS).
2. Data Processing: A data lakehouse does not process or transform data, 

#Implementing HF Pipeline in LangChain

In [14]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

  warn_deprecated(


In [15]:
# checking again that everything is working fine
llm(prompt="Explain me the difference between Data Lakehouse and Data Warehouse.")

  warn_deprecated(


'Explain me the difference between Data Lakehouse and Data Warehouse. Unterscheidung between data lakehouse and data warehouse is a common topic of discussion in the data engineering community, as both are designed to store large amounts of data but have different architectures, use cases, and benefits. A data lakehouse is a centralized repository that stores all the raw data from various sources in its original form, without transforming or processing it. On the other hand, a data warehouse is a structured repository that stores data in a specific format, typically after cleaning, transforming, and aggregating it.\n\nHere are some key differences between a data lakehouse and a data warehouse:\n\n1. Data Structure: A data lakehouse stores data in its raw, unprocessed form, while a data warehouse stores data in a structured format, typically after cleaning, transforming, and aggregating it.\n2. Data Sources: A data lakehouse can ingest data from various sources, including databases, API

#Ingesting Data using Document Loader

In [19]:
from langchain.document_loaders import WebBaseLoader

In [20]:
web_links = [
    "https://www.databricks.com/",
    "https://help.databricks.com",
    "https://databricks.com/try-databricks",
    "https://www.datascienceportfol.io/yadavharsh250",
    "https://databricks.com/privacy-policy",
    "https://databricks.com/terms-of-use"
]
loader = WebBaseLoader(web_links)
documents = loader.load()

In [30]:
print(len(documents),  type(documents))

6 <class 'list'>


In [28]:
documents[0]

Document(metadata={'source': 'https://www.databricks.com/', 'title': 'The Data and AI Company — Databricks', 'description': 'The Databricks Platform is the world’s first data intelligence platform powered by generative AI. Infuse AI into every facet of your business.', 'language': 'en-US'}, page_content='The Data and AI Company — DatabricksSkip to main contentLoginWhy Databricks DiscoverFor ExecutivesFor Startups Lakehouse Architecture DatabricksIQMosaic ResearchCustomersFeatured StoriesSee All CustomersPartnersCloud ProvidersDatabricks on AWS, Azure, and GCPConsulting & System IntegratorsExperts to build, deploy and migrate to DatabricksTechnology PartnersConnect your existing tools to your LakehouseC&SI Partner ProgramBuild, deploy or migrate to the LakehouseData PartnersAccess the ecosystem of data consumersPartner SolutionsFind custom industry and migration solutionsBuilt on DatabricksBuild, market and grow your businessProduct Databricks PlatformPlatform OverviewA unified platform

#Splitting in Chunks using Text Splitters

In [31]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [32]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=40)
all_splits = text_splitter.split_documents(documents)

In [33]:
len(all_splits)

103

#Creating Embeddings and Storing in Vector Store

In [34]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [35]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

  warn_deprecated(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

#Initializing Chain

In [36]:
from langchain.chains import ConversationalRetrievalChain

In [38]:
chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

In [39]:
chat_history = []

query = "What is Data lakehouse architecture in Databricks?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

  warn_deprecated(


Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Terms of Use | DatabricksSkip to main contentLoginWhy Databricks DiscoverFor ExecutivesFor Startups Lakehouse Architecture DatabricksIQMosaic ResearchCustomersFeatured StoriesSee All CustomersPartnersCloud ProvidersDatabricks on AWS, Azure, and GCPConsulting & System IntegratorsExperts to build, deploy and migrate to DatabricksTechnology PartnersConnect your existing tools to your LakehouseC&SI Partner ProgramBuild, deploy or migrate to the LakehouseData PartnersAccess the ecosystem of data consumersPartner SolutionsFind custom industry and migration solutionsBuilt on DatabricksBuild, market and grow your businessProduct Databricks PlatformPlatform OverviewA unified platform for data, analytics and AIData ManagementData reliability, security and performanceSharingAn open, secure, zero-copy sharing for all dataData Warehousin

In [40]:
chat_history = [(query, result["answer"])]

query = "What are Data Governance and Interoperability in it?"
result = chain({"question": query, "chat_history": chat_history})

result['answer']

"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nPrivacy Notice | DatabricksSkip to main contentLoginWhy Databricks DiscoverFor ExecutivesFor Startups Lakehouse Architecture DatabricksIQMosaic ResearchCustomersFeatured StoriesSee All CustomersPartnersCloud ProvidersDatabricks on AWS, Azure, and GCPConsulting & System IntegratorsExperts to build, deploy and migrate to DatabricksTechnology PartnersConnect your existing tools to your LakehouseC&SI Partner ProgramBuild, deploy or migrate to the LakehouseData PartnersAccess the ecosystem of data consumersPartner SolutionsFind custom industry and migration solutionsBuilt on DatabricksBuild, market and grow your businessProduct Databricks PlatformPlatform OverviewA unified platform for data, analytics and AIData ManagementData reliability, security and performanceSharingAn open, secure, zero-copy sharing for all dataData Wareh

In [41]:
print(result['source_documents'])

[Document(metadata={'source': 'https://databricks.com/privacy-policy', 'title': 'Privacy Notice | Databricks', 'description': 'This Privacy Notice explains how Databricks, Inc.', 'language': 'en-US'}, page_content='Privacy Notice | DatabricksSkip to main contentLoginWhy Databricks DiscoverFor ExecutivesFor Startups Lakehouse Architecture DatabricksIQMosaic ResearchCustomersFeatured StoriesSee All CustomersPartnersCloud ProvidersDatabricks on AWS, Azure, and GCPConsulting & System IntegratorsExperts to build, deploy and migrate to DatabricksTechnology PartnersConnect your existing tools to your LakehouseC&SI Partner ProgramBuild, deploy or migrate to the LakehouseData PartnersAccess the ecosystem of data consumersPartner SolutionsFind custom industry and migration solutionsBuilt on DatabricksBuild, market and grow your businessProduct Databricks PlatformPlatform OverviewA unified platform for data, analytics and AIData ManagementData reliability, security and performanceSharingAn open, 