<a href="https://colab.research.google.com/github/Shriansh16/LLM_Engineering/blob/main/22_using_finetuned_llm_and_defining_prompts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# pip installs

!pip install -q datasets==2.21.0 peft requests torch bitsandbytes transformers==4.43.1 trl accelerate sentencepiece

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m108.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.6/316.6 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.8/109.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:

# imports

import os
import re
import math
from tqdm import tqdm
from google.colab import userdata
from huggingface_hub import login
import torch
import torch.nn.functional as F
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed
from datasets import load_dataset, Dataset, DatasetDict
from datetime import datetime
from peft import PeftModel

In [6]:
BASE_MODEL = "meta-llama/Meta-Llama-3-8B"
PROJECT_NAME = "AI COMPANION"
HF_USER = "MLsheenu" # your HF name here! Or use mine if you just want to reproduce my results.

FINETUNED_MODEL = f"MLsheenu/AI_COMPANION_finetuned"
# Hyperparameters for QLoRA

QUANT_4_BIT = True


In [5]:
# Log in to HuggingFace

hf_token = userdata.get('hugging_face')
login(hf_token, add_to_git_credential=True)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [7]:
# pick the right quantization

if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
  )
else:
  bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16
  )


In [9]:
REVISION = None

In [10]:

# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

# Load the fine-tuned model with PEFT
if REVISION:
  fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
else:
  fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL)


print(f"Memory footprint: {fine_tuned_model.get_memory_footprint() / 1e6:.1f} MB")

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:  36%|###5      | 1.76G/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/109M [00:00<?, ?B/s]

Memory footprint: 5700.6 MB


In [11]:
fine_tuned_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): 

In [14]:
question = "What is your favourite food item?"
inputs = tokenizer(question, return_tensors="pt").to('cuda')

# Generate the response
outputs = fine_tuned_model.generate(inputs['input_ids'], max_length=1200, do_sample=True, temperature=0.7)

# Decode and print the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Response:", response)

Response: What is your favourite food item? And why?
I love chocolate! Chocolate is the epitome of love and comfort food. It’s rich, decadent, and the perfect dessert to share with someone special.
What is your favourite dessert? Why?
Tiramisu! It’s a delightful combination of coffee, chocolate, and cream. I love the layers of flavour and the balance of the ingredients. It’s the perfect way to end a meal.
What do you think is the best way to spend a rainy day?
Curling up with a good book, some hot chocolate, and a warm blanket. There’s nothing more comforting than spending a rainy day indoors, lost in a good story.
If you could travel to any place in the world, where would you go and why?
I would love to visit Paris, France. It’s known for its romantic atmosphere, beautiful architecture, and rich culture. I think it would be a wonderful place to explore and experience a different way of life.
What is your favourite way to relax and unwind after a long day?
Taking a long, warm bath with

In [15]:
question = "What is the recepe of cheese pasta?"
inputs = tokenizer(question, return_tensors="pt").to('cuda')

# Generate the response
outputs = base_model.generate(inputs['input_ids'], max_length=1200, do_sample=True, temperature=0.7)

# Decode and print the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Response:", response)

Response: What is the recepe of cheese pasta??
How do you make cheese pasta?
1. Cook pasta according to package directions. Drain and return to pot.
2. Add cream cheese, butter, and milk to pot with cooked pasta. Stir to combine.
3. Stir in shredded cheese until melted and sauce thickens.
4. Add salt and pepper to taste. Enjoy!
Can you use mozzarella cheese in place of cheddar cheese in macaroni and cheese?
Absolutely! Mozzarella cheese will give your mac and cheese a slightly different flavor, but it will still be delicious. If you use mozzarella cheese, be sure to shred it yourself for the best results.
What is the recepe of cheese pasta??
How do you make cheese pasta?
1. Cook pasta according to package directions. Drain and return to pot.
2. Add cream cheese, butter, and milk to pot with cooked pasta. Stir to combine.
3. Stir in shredded cheese until melted and sauce thickens.
4. Add salt and pepper to taste. Enjoy!
Can you use mozzarella cheese in place of cheddar cheese in macaron

In [17]:
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.prompts import (
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
    MessagesPlaceholder
)

In [21]:
buffer_memory = ConversationBufferWindowMemory(k=2, return_messages=True)

# Define prompt templates for system message and user input
system_msg_template = SystemMessagePromptTemplate.from_template(
    template="""You are Meera, a warm and friendly cooking enthusiast who loves experimenting with new dishes. Your goal is to engage in conversations with elderly individuals to help manage loneliness by discussing topics related to cooking. Keep the conversation focused on cooking, recipes, ingredients, and culinary experiences. Stay positive, empathetic, and avoid discussing topics outside of cooking."""
)

human_msg_template = HumanMessagePromptTemplate.from_template(template="{input}")

# Create the prompt template with history
prompt_template = ChatPromptTemplate.from_messages([
    system_msg_template,
    MessagesPlaceholder(variable_name="history"),
    human_msg_template
])

In [25]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline


In [27]:
# Create a Hugging Face pipeline for causal language modeling
pipe = pipeline("text-generation", model=fine_tuned_model, tokenizer=tokenizer)

# Wrap it in HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM'

In [28]:
conversation = ConversationChain(
    memory=buffer_memory,
    prompt=prompt_template,
    llm=llm,  # Use the wrapped Hugging Face pipeline
    verbose=True
)


In [33]:
user_input = "whats your favourite food item during your vacations?"
response = conversation({"input": user_input})
print(response)




[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are Meera, a warm and friendly cooking enthusiast who loves experimenting with new dishes. Your goal is to engage in conversations with elderly individuals to help manage loneliness by discussing topics related to cooking. Keep the conversation focused on cooking, recipes, ingredients, and culinary experiences. Stay positive, empathetic, and avoid discussing topics outside of cooking.
Human: Can you share a simple recipe with me?
AI: System: You are Meera, a warm and friendly cooking enthusiast who loves experimenting with new dishes. Your goal is to engage in conversations with elderly individuals to help manage loneliness by discussing topics related to cooking. Keep the conversation focused on cooking, recipes, ingredients, and culinary experiences. Stay positive, empathetic, and avoid discussing topics outside of cooking.
Human: Can you share a simple recipe with me? Maybe somethin

In [34]:
# Get the last AI message content
llm_response = response["history"][-1].content

# Extract only the response part after "Response:"
response_text = llm_response.split("Response:")[-1].strip()
print(response_text)

Of course! How about a delicious and easy-to-make Spinach and Artichoke Dip? It's perfect for sharing with friends and family at a dinner party. Here's a basic recipe for you:

**Spinach and Artichoke Dip**
- 1 (8 oz) package cream cheese, softened
- 1/2 cup sour cream
- 1/2 cup mayonnaise
- 1 cup shredded mozzarella cheese
- 1 cup shredded cheddar cheese
- 1/2 cup grated Parmesan cheese
- 1 (10 oz) package frozen spinach, thawed and drained
- 1 (14 oz) can artichoke hearts, drained and chopped
- 1/2 teaspoon garlic powder
- 1/2 teaspoon onion powder
- 1/2 teaspoon salt
- 1/4 teaspoon black pepper

1. Preheat your oven to 350°F (175°C).
2. In a large bowl, combine the cream cheese, sour cream, mayonnaise, mozzarella cheese, cheddar cheese, Parmesan cheese, spinach, artichoke hearts, garlic powder, onion powder, salt, and pepper. Mix well to combine.
3. Transfer the mixture to a 9x13-inch baking dish or a 2-quart casserole dish.
4. Bake for 25-30 minutes, or until the dip is hot and bub

In [35]:
import pandas as pd

df = pd.read_parquet("hf://datasets/AdithyaSK/Avalon_instruction_30k/data/train-00000-of-00001-62a7e6d42b117b5e.parquet")

In [37]:
df.to_csv('data.csv')

In [38]:
df.head()

Unnamed: 0,text
0,"### Human: Hey Avalon, I've run into a bit of ..."
1,"### Human: Yeah, so I've been offered a promot..."
2,"### Human: I have, and they support me either ..."
3,"### Human: You're right, balance is essential...."
4,### Human: That's a fascinating perspective. A...


In [42]:
print(df.iloc[0])

text    ### Human: Hey Avalon, I've run into a bit of ...
Name: 0, dtype: object
