## Loading hf model via lanchain and running prompts

In [1]:
# from langchain_huggingface import ChatHuggingFace #to use huggingface llm class directly
# from langchain_huggingface import HuggingFaceEndpoint #to run models serverless manner
# from langchain_huggingface import HuggingFacePipeline #to run models locally
# from langchain_huggingface import HuggingFaceEmbeddings #to run embedding model through dedicated endpoints
# from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings #to run embedding models serverless manner
# from langchain_huggingface import HuggingFaceEmbeddings #to run embedding models locally
# from langchain_community.embeddings import HuggingFaceInstructEmbeddings #to run embedding models locally
# from langchain_community.embeddings import HuggingFaceBgeEmbeddings #to run bge embedding model

In [1]:
from huggingface_hub import login
login() # You will be prompted for your HF key, which will then be saved locally

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Checking cuda memory details

In [4]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == "cuda":
    total_memory = torch.cuda.get_device_properties(device).total_memory
    print(f"Total CUDA memory: {total_memory / (1024**3):.2f} GB")

Total CUDA memory: 4.00 GB


In [3]:
# import torch
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# if device.type == "cuda":
#     allocated_memory = torch.cuda.memory_allocated(device)
#     print(f"Allocated CUDA memory: {allocated_memory / (1024**3):.2f} GB")

In [5]:
# def print_gpu_memory():
#     print(f"Allocated memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
#     print(f"Cached memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
# print_gpu_memory()

In [6]:
torch.cuda.empty_cache()

In [7]:
# del llm
# del data

## Load a small hf llm

In [9]:
# bigger model with more params
from langchain_huggingface import HuggingFacePipeline
import torch # Still needed for device settings if not using default CPU

torch.cuda.empty_cache()

llm = HuggingFacePipeline.from_model_id(
    model_id="meta-llama/Llama-3.2-1B", #"mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    # device_map="auto", # ['auto', 'cuda', 'cpu']
    # torch_dtype=torch.bfloat16, # For memory efficiency
    pipeline_kwargs={
        "max_new_tokens": 500,
        "do_sample": True,
        "temperature": 0.1,
        # "top_p": 0.9,
        # "top_k": 20,
        # "repetition_penalty": 1.1,
        # For Phi-3: "trust_remote_code": True, # This might go into model_kwargs if not directly supported by pipeline_kwargs
    },
    # model_kwargs={"trust_remote_code": True},
)

Device set to use cuda:0


In [12]:
llm.invoke("Recommend some good movies.")

"Recommend some good movies. I'm a big fan of the Star Wars series, but I'm not sure if I want to watch them again. I'm also a big fan of the Lord of the Rings series, but I'm not sure if I want to watch them again. I'm also a big fan of the Harry Potter series, but I'm not sure if I want to watch them again. I'm also a big fan of the Twilight series, but I'm not sure if I want to watch them again. I'm also a big fan of the Hunger Games series, but I'm not sure if I want to watch them again. I'm also a big fan of the Divergent series, but I'm not sure if I want to watch them again. I'm also a big fan of the Maze Runner series, but I'm not sure if I want to watch them again. I'm also a big fan of the Maze Runner series, but I'm not sure if I want to watch them again. I'm also a big fan of the Maze Runner series, but I'm not sure if I want to watch them again. I'm also a big fan of the Maze Runner series, but I'm not sure if I want to watch them again. I'm also a big fan of the Maze Runn

### Making of prompt

In [26]:
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, PromptTemplate

# Defining the system prompt (how the AI should act)
system_prompt = SystemMessagePromptTemplate.from_template(
    "You are an AI assistant to summarize articles."
)

# the user prompt is provided by the user, in this case however the only dynamic
# input is the article
user_prompt = HumanMessagePromptTemplate.from_template(
    """Summarize the below article within:- \n {title} \n {article} """,
    input_variables=["title", "article"]
)
ai_prompt = AIMessagePromptTemplate.from_template("")

In [16]:
import pandas as pd
data = pd.read_csv("..\data\Articles.csv", encoding='latin-1')
title = data.loc[0, 'Heading']
article = data.loc[0, 'Article']

In [27]:
print(user_prompt.format(title=title.strip(), article=article.strip()))

content='Summarize the below article within:- \n sindh govt decides to cut public transport fares by 7pc kti rej \n KARACHI: The Sindh government has decided to bring down public transport fares by 7 per cent due to massive reduction in petroleum product prices by the federal government, Geo News reported.Sources said reduction in fares will be applicable on public transport, rickshaw, taxi and other means of traveling.Meanwhile, Karachi Transport Ittehad (KTI) has refused to abide by the government decision.KTI President Irshad Bukhari said the commuters are charged the lowest fares in Karachi as compare to other parts of the country, adding that 80pc vehicles run on Compressed Natural Gas (CNG). Bukhari said Karachi transporters will cut fares when decrease in CNG prices will be made. ' additional_kwargs={} response_metadata={}


In [45]:
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([system_prompt, user_prompt, ai_prompt])

prompt.format(title=title.strip(), article=article.strip())

'System: You are an AI assistant to summarize articles.\nHuman: Summarize the below article within:- \n sindh govt decides to cut public transport fares by 7pc kti rej \n KARACHI: The Sindh government has decided to bring down public transport fares by 7 per cent due to massive reduction in petroleum product prices by the federal government, Geo News reported.Sources said reduction in fares will be applicable on public transport, rickshaw, taxi and other means of traveling.Meanwhile, Karachi Transport Ittehad (KTI) has refused to abide by the government decision.KTI President Irshad Bukhari said the commuters are charged the lowest fares in Karachi as compare to other parts of the country, adding that 80pc vehicles run on Compressed Natural Gas (CNG). Bukhari said Karachi transporters will cut fares when decrease in CNG prices will be made. \nAI: '

In [49]:
prompt_text = prompt.format(title=title.strip(), article=article.strip())
llm.invoke(prompt_text)

'System: You are an AI assistant to summarize articles.\nHuman: Summarize the below article within:- \n sindh govt decides to cut public transport fares by 7pc kti rej \n KARACHI: The Sindh government has decided to bring down public transport fares by 7 per cent due to massive reduction in petroleum product prices by the federal government, Geo News reported.Sources said reduction in fares will be applicable on public transport, rickshaw, taxi and other means of traveling.Meanwhile, Karachi Transport Ittehad (KTI) has refused to abide by the government decision.KTI President Irshad Bukhari said the commuters are charged the lowest fares in Karachi as compare to other parts of the country, adding that 80pc vehicles run on Compressed Natural Gas (CNG). Bukhari said Karachi transporters will cut fares when decrease in CNG prices will be made. \nAI: 7pc fare cut for public transport in Sindh\nAI: 7pc fare cut for public transport in Sindh\nAI: 7pc fare cut for public transport in Sindh\nA

In [51]:
# chain
summarizer_chain = prompt | llm
summarizer_chain.invoke({'title': title.strip(), 'article': article.strip()})

'System: You are an AI assistant to summarize articles.\nHuman: Summarize the below article within:- \n sindh govt decides to cut public transport fares by 7pc kti rej \n KARACHI: The Sindh government has decided to bring down public transport fares by 7 per cent due to massive reduction in petroleum product prices by the federal government, Geo News reported.Sources said reduction in fares will be applicable on public transport, rickshaw, taxi and other means of traveling.Meanwhile, Karachi Transport Ittehad (KTI) has refused to abide by the government decision.KTI President Irshad Bukhari said the commuters are charged the lowest fares in Karachi as compare to other parts of the country, adding that 80pc vehicles run on Compressed Natural Gas (CNG). Bukhari said Karachi transporters will cut fares when decrease in CNG prices will be made. \nAI: 7pc reduction in fares\nAI: 7pc reduction in fares\nAI: 7pc reduction in fares\nAI: 7pc reduction in fares\nAI: 7pc reduction in fares\nAI: 7