In [1]:
# Check the GPU status
!nvidia-smi

Tue Feb  4 17:52:09 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
# Install required Python packages
!pip install -q transformers einops accelerate langchain bitsandbytes  # transformers for model, einops for tensor operations, accelerate for optimized performance, langchain for LLM integration, bitsandbytes for efficient computation

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m107.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m62.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
# Install langchain community module
!pip install langchain_community  # Provides community-contributed tools and integrations for LangChain

Collecting langchain_community
  Downloading langchain_community-0.3.16-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB

In [5]:
# Import necessary libraries
from langchain import HuggingFacePipeline  # For integrating Hugging Face models with LangChain
from transformers import AutoTokenizer, pipeline  # AutoTokenizer for tokenizing text, pipeline for model inference
import torch  # PyTorch for tensor computations

In [7]:
# Specify the pre-trained model
model = "tiiuae/falcon-7b-instruct"  # Falcon 7B model for instruction-based tasks

In [8]:
# Load the tokenizer for the specified model
tokenizer = AutoTokenizer.from_pretrained(model)  # Automatically loads the appropriate tokenizer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [9]:
# Create a text generation pipeline using the specified model and tokenizer
pipeline = pipeline(
    "text-generation",               # Defines the task type as text generation
    model=model,                      # Specifies the pre-trained model
    tokenizer=tokenizer,              # Tokenizer for converting text to tokens and vice versa
    torch_dtype=torch.bfloat16,       # Use bfloat16 for optimized memory usage and performance
    trust_remote_code=True,           # Trusts custom code from the model repository
    device_map="auto",               # Automatically selects the device (CPU/GPU) for computation
    max_length=200,                   # Sets the maximum length of generated text
    do_sample=True,                   # Enables sampling to produce diverse text outputs
    top_k=10,                         # Limits sampling to the top 10 tokens
    num_return_sequences=1,           # Generates one output sequence per input
    eos_token_id=tokenizer.eos_token_id  # Defines the end-of-sequence token
)

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Device set to use cuda:0


In [10]:
# Wrap the Hugging Face pipeline with LangChain's LLM wrapper
llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0})  # Temperature set to 0 for deterministic output

  llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})


In [11]:
# Import classes for prompt template and LLM chain
from langchain import PromptTemplate, LLMChain  # PromptTemplate helps structure prompts, LLMChain manages interactions with the model

In [12]:
# Define a prompt template to structure the input text
template = """
You are an intelligent chatbot. Help the following question with brilliant answers.
Question: {question}
Answer:"""  # Defines a conversational context with placeholders for dynamic input

In [13]:
# Create a PromptTemplate object
prompt = PromptTemplate(template=template, input_variables=["question"])  # Specifies 'question' as the dynamic input variable

In [14]:
# Create an LLMChain to process the input using the defined prompt and LLM
llm_chain = LLMChain(prompt=prompt, llm=llm)  # Combines the prompt and LLM for generating responses

  llm_chain = LLMChain(prompt=prompt, llm=llm)


In [15]:
# Example 1: Ask the model to explain Artificial Intelligence as a nursery rhyme
question = "Explain what is Artificial Intellience as Nursery Rhymes"
print(llm_chain.run(question))  # Executes the LLM chain and prints the response

  print(llm_chain.run(question))
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



You are an intelligent chatbot. Help the following question with brilliant answers.
Question: Explain what is Artificial Intellience as Nursery Rhymes 
Answer:

AI is the sun that shines on the rain
To brighten up our lives and ease our pain
Helping us to learn, to think, and to see
AI's here to make us happy, and ease our worries.
User 


In [16]:
# Example 2: Ask the model to provide a code snippet for adding two numbers
question = "Give me a code for adding 2 numbers"
print(llm_chain.run(question))  # Executes the LLM chain and prints the response

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.



You are an intelligent chatbot. Help the following question with brilliant answers.
Question: Give me a code for adding 2 numbers
Answer: You can use the following code snippet to add 2 numbers in Python: 
def add_two_numbers(num1, num2): 
   return num1 + num2 
   
print(add_two_numbers(10, 20))
