In [None]:
# #installing necessary libraries
# !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

# **Loading dataset**

In [None]:

from datasets import load_dataset

dataset = load_dataset("csv", data_files="/content/final_train_40kdata_cat2.csv",split="train")

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset

Dataset({
    features: ['productId', 'Title', 'userId', 'Helpfulness', 'Score', 'Time', 'input', 'Cat1', 'output', 'Cat3', 'instruction', 'text'],
    num_rows: 38983
})

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# **Loading Model from Hugging Face**

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "meta-llama/Llama-2-7b-chat-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

# **Loading Tokenizer**

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

# **Importing necessary libraries from pef and setting up necessary parameters for fine tuning**

In [None]:
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

**Importing TrainingArguments from transformers and making necessary set up for fine tuning**

In [None]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 100
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 100
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)


**importing SFTTrainer from trl and making necessry setup for fine tuning the model**

In [None]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)



Map:   0%|          | 0/38983 [00:00<?, ? examples/s]

In [None]:

for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

# **Training starting**

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,2.7736
20,2.2035
30,1.6949
40,1.3801
50,0.9655
60,1.9387
70,1.6521
80,1.4709
90,1.2125
100,0.9124


TrainOutput(global_step=100, training_loss=1.620420160293579, metrics={'train_runtime': 1048.2829, 'train_samples_per_second': 1.526, 'train_steps_per_second': 0.095, 'total_flos': 5863460936908800.0, 'train_loss': 1.620420160293579, 'epoch': 0.04})

# **Saving the model locally in outputs directory**

In [None]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputs")

**Loading Fine tuned Lora model**

In [None]:
lora_config = LoraConfig.from_pretrained('outputs')
model = get_peft_model(model, lora_config)

**Pusing the model to hugging face hub**

In [None]:
model.push_to_hub("llama2-fine-tuned-classfier-cat2")

adapter_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Sakil/llama2-fine-tuned-classfier-cat2/commit/82b1615da38b0fbbef75fe3001ddff7032fe0c1d', commit_message='Upload model', commit_description='', oid='82b1615da38b0fbbef75fe3001ddff7032fe0c1d', pr_url=None, pr_revision=None, pr_num=None)

# **Inferencing for 2.1 - Input to your prompt will be a text from column name Text, and output should be class name from Column Name Cat 2**

In [None]:
# !pip install langchain

# **Loading fine-tuned model from hugging face**

In [None]:

lora_config = LoraConfig.from_pretrained('Sakil/llama2-fine-tuned-classfier-cat2')
model = get_peft_model(model, lora_config)

adapter_config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

# **Importing necessary libraries**

In [None]:
from huggingface_hub.hf_api import HfFolder
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer
import transformers
import torch

**Calling text-generation from transformer pipeline**

In [None]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    max_length=60,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0.2})

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'O

In [None]:
import pandas as pd

# **Finally performing inferencing on our inference data by fine tuned model and few shot prompting**

In [None]:

from langchain import PromptTemplate, LLMChain

# Read categories from the CSV file
df = pd.read_csv('/content/inference_data.csv')
df['Text']=df['Text'].astype(str)
df=df.iloc[:100]
# Assuming the categories are in the 'cat2' column
categories = df['Cat2'].tolist()

# Create a comma-separated string of categories for the prompt
categories_str = ', '.join(categories)

# Modify the prompt template
template = f"""Classify the text into {categories_str}. Reply with only the words given in in: {categories_str}.

Examples:
Text: The description and photo on this product needs to be changed to indicate this product is the BuffalOs version of this beef jerky.
Cat2: meat poultry.

Text: This was a great book!!!! It is well thought through, and you can easily imagine the events happening. The Westing Game itself is a great way to tie two things together. The events are well sequenced and exciting. Ellen Raskin wrote a wonderful book
Cat2: games.

Text: {{text}}
Cat2:"""

# Assuming you have already defined the 'llm' model
# llm = ...

# Create the prompt template
prompt = PromptTemplate(template=template, input_variables=["text"])

# Create the LLMChain
llm_chain = LLMChain(prompt=prompt, llm=llm)


In [None]:
# Create a new column 'predicted_label' in the DataFrame
df['predicted_label_cat2'] = ""

# Iterate through each row and make predictions
for index, row in df.iterrows():
    text = row['Text']  # Assuming 'Text' is the column name in your CSV file
    raw_llm_answer = llm_chain.run(text)

    # Assuming raw_llm_answer is a string, not a dictionary
    predicted_label = raw_llm_answer.strip()

    df.at[index, 'predicted_label_cat2'] = predicted_label

Input length of input_ids is 817, but `max_length` is set to 60. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 843, but `max_length` is set to 60. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 897, but `max_length` is set to 60. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 906, but `max_length` is set to 60. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 930, but `max_length` is set to 60. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 829, but `max_length` is set to 60. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 944, but `max_length` is set to 60. This can lead to unexpe

In [None]:
df.head()

Unnamed: 0,productId,Title,userId,Helpfulness,Score,Time,Text,Cat1,Cat2,Cat3,predicted_label_cat2
0,B000E46LYG,Golden Valley Natural Buffalo Jerky,A3MQDNGHDJU4MK,0/0,3,-1,The description and photo on this product need...,grocery gourmet food,meat poultry,jerky,games
1,B000GRA6N8,Westing Game,unknown,0/0,5,860630400,This was a great book!!!! It is well thought t...,toys games,games,unknown,games
2,B000GRA6N8,Westing Game,unknown,0/0,5,883008000,"I am a first year teacher, teaching 5th grade....",toys games,games,unknown,learning
3,B000GRA6N8,Westing Game,unknown,0/0,5,897696000,I got the book at my bookfair at school lookin...,toys games,games,unknown,games
4,B00000DMDQ,I SPY A is For Jigsaw Puzzle 63pc,unknown,4-Feb,5,911865600,Hi! I'm Martine Redman and I created this puzz...,toys games,puzzles,jigsaw puzzles,puzz


**saving the result** **bold text**

In [None]:
df.to_csv(r"/content/inference_data_result_cat2.csv",index=False)