In [None]:
!pip install -q -U torch --index-url https://download.pytorch.org/whl/cu117

In [None]:
!pip install -q -U transformers=="4.38.2"
!pip install -q accelerate
!pip install -q -i https://pypi.org/simple/ bitsandbytes
!pip install -q -U datasets

In [None]:
!pip install -q -U git+https://github.com/huggingface/trl
!pip install -q -U git+https://github.com/huggingface/peft

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
!pip install -q -U datasets==2.17.0

In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
import torch.nn as nn

import transformers
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from datasets import Dataset
from peft import LoraConfig, PeftConfig
import bitsandbytes as bnb
from trl import SFTTrainer

2024-05-04 09:44:53.876311: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-04 09:44:53.876388: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-04 09:44:53.878001: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
model_name = "/kaggle/input/gemma/transformers/7b-it/1"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name)
EOS_TOKEN = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
model.config

GemmaConfig {
  "_name_or_path": "/kaggle/input/gemma/transformers/7b-it/1",
  "architectures": [
    "GemmaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 2,
  "eos_token_id": 1,
  "head_dim": 256,
  "hidden_act": "gelu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 24576,
  "max_position_embeddings": 8192,
  "model_type": "gemma",
  "num_attention_heads": 16,
  "num_hidden_layers": 28,
  "num_key_value_heads": 16,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  },
  "rms_norm_eps": 1e-

In [None]:
model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 3072, padding_idx=0)
    (layers): ModuleList(
      (0-27): 28 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=3072, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=24576, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=24576, bias=False)
          (down_proj): Linear4bit(in_features=24576, out_features=3072, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
   

# Dataset Loading & Basic Preprocessing

In [None]:
from datasets import Dataset, load_dataset
dataset = load_dataset('cnn_dailymail', '3.0.0')
dataset

In [None]:
dataset_train = dataset["train"].select(range(500))
dataset_validation = dataset["validation"].select(range(100))
dataset_test = dataset["test"].select(range(100))

print(dataset_train)
print(dataset_validation)
print(dataset_test)

In [None]:
# Convert the dataset to a pandas DataFrame
df = dataset_train.to_pandas()

# View the first 5 rows of the DataFrame
first_five_rows = df.head()
print(first_five_rows)

In [None]:
df = dataset_test.to_pandas()

# Zero Shot Prompt Engineering

In [None]:
SYSTEM_PROMPT = """Summarize the following article."""


syntheses_with_gemma = []

for idx, row in df.iterrows():
    article = row['article']

    prompt = f"""<start_of_turn>user\n{SYSTEM_PROMPT}\n{article}\n<end_of_turn>\n"""

    # Tokenize the prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate response
    output = model.generate(input_ids=inputs["input_ids"], max_new_tokens=100)

    # Decode the response
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    #print(generated_text)

    #Finding Extracted Answer
    index_of_answer = generated_text.find("<end_of_turn>")

    # Extract the text after "Answer:"
    model_summary = generated_text[index_of_answer + len("<end_of_turn>"):].strip()
    syntheses_with_gemma.append(model_summary)
    #print(answer_text)
    print(idx+1)

    #Comparing the answer with the base answer
    dash_line = '-'.join('' for x in range(100))
    summary = df.loc[idx, 'highlights']
#     print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
#     print(dash_line)
#     print(f'MODEL GENERATION - ZERO SHOT:\n{model_summary}')
#     print(dash_line)

#Appending it to main file
df['Generated_BY_GEMMA'] = syntheses_with_gemma


In [None]:
df.head()

In [None]:
from IPython.display import FileLink
Zero_Shot_Gemma = df.rename(columns={'article': 'article', 'highlights': 'highlights', 'id': 'id', 'Generated_BY_Gemma': 'Generated_BY_Gemma'})
Zero_Shot_Gemma.to_csv('Zero_Shot_Gemma.csv', index=False)
FileLink('Zero_Shot_Gemma.csv')

# Zero Shot Test

### BLUE, ROUGE 1, ROUGE 2 and ROUGE L score

In [None]:
!pip install rouge_score
!pip install scikit-learn

import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')


In [None]:
def calculate_metrics(reference, candidate):
    # Tokenize reference and candidate
    reference_tokens = nltk.word_tokenize(reference)
    candidate_tokens = nltk.word_tokenize(candidate)

    # Calculate BLEU score
    bleu_score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=SmoothingFunction().method7)

    # Calculate ROUGE scores
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)

    return bleu_score, scores

In [None]:
# Iterate through rows of the DataFrame and calculate scores
from rouge_score import rouge_scorer

bleu_scores = []
rouge1_scores = []
rouge2_scores = []
rougeL_scores = []

for index, row in Zero_Shot_Gemma.iterrows():
    reference = row['highlights']
    candidate = row['Generated_BY_GEMMA']

    bleu_score, rouge_scores = calculate_metrics(reference, candidate)

    bleu_scores.append(bleu_score)
    rouge1_scores.append(rouge_scores['rouge1'].fmeasure)
    rouge2_scores.append(rouge_scores['rouge2'].fmeasure)
    rougeL_scores.append(rouge_scores['rougeL'].fmeasure)

# Calculate average scores
avg_bleu_score = sum(bleu_scores) / len(bleu_scores)
avg_rouge1_score = sum(rouge1_scores) / len(rouge1_scores)
avg_rouge2_score = sum(rouge2_scores) / len(rouge2_scores)
avg_rougeL_score = sum(rougeL_scores) / len(rougeL_scores)

# Print average scores
print(f"Average BLEU Score: {avg_bleu_score}")
print(f"Average ROUGE-1 Score: {avg_rouge1_score}")
print(f"Average ROUGE-2 Score: {avg_rouge2_score}")
print(f"Average ROUGE-L Score: {avg_rougeL_score}")

In [None]:
!pip install bert_score

In [None]:
from bert_score import score
import pandas as pd

# Initialize lists to store scores
avg_precision_list = []
avg_recall_list = []
avg_f1_list = []

# Iterate through rows of the DataFrame and calculate scores
for index, row in Zero_Shot_Gemma.iterrows():
    reference = [row['highlights']]
    candidate = [row['Generated_BY_GEMMA']]

    # Calculate BERTScore
    P, R, F1 = score(candidate, reference, lang="en")

    avg_precision = P.mean().item()
    avg_recall = R.mean().item()
    avg_f1 = F1.mean().item()

    avg_precision_list.append(avg_precision)
    avg_recall_list.append(avg_recall)
    avg_f1_list.append(avg_f1)

# Calculate average precision, recall, and F1 score
avg_precision = sum(avg_precision_list) / len(avg_precision_list)
avg_recall = sum(avg_recall_list) / len(avg_recall_list)
avg_f1 = sum(avg_f1_list) / len(avg_f1_list)

# Print average precision, recall, and F1 score
print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")


# Fine Tune Gemma

In [None]:
from datasets import Dataset, load_dataset
dataset = load_dataset('cnn_dailymail', '3.0.0')
dataset

DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 11490
    })
})

In [None]:
dataset_train = dataset["train"].select(range(500))
dataset_validation = dataset["validation"].select(range(100))
dataset_test = dataset["test"].select(range(100))

print(dataset_train)
print(dataset_validation)
print(dataset_test)

Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 500
})
Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 100
})
Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 100
})


In [None]:
dataset_train = dataset_train.to_pandas()
dataset_validation = dataset_validation.to_pandas()

In [None]:
import pandas as pd
from datasets import Dataset

prompts = []
for idx, row in dataset_train.iterrows():
    article = row['article']
    summary = row['highlights']

    SYSTEM_PROMPT = """Summarize the following article."""

    prompt = f"<start_of_turn>user\n{SYSTEM_PROMPT}\n{article}\n<end_of_turn>\n<start_of_turn>model\n{summary}\n<end_of_turn>"
    prompts.append(prompt)

dataset = Dataset.from_pandas(pd.DataFrame({'text': prompts}))
dataset

Dataset({
    features: ['text'],
    num_rows: 500
})

In [None]:
prompts = []
for idx, row in dataset_validation.iterrows():
    article = row['article']
    summary = row['highlights']

    SYSTEM_PROMPT = """Summarize the following article."""

    prompt = f"<start_of_turn>user\n{SYSTEM_PROMPT}\n{article}\n<end_of_turn>\n<start_of_turn>model\n{summary}\n<end_of_turn>"
    prompts.append(prompt)

eval_dataset = Dataset.from_pandas(pd.DataFrame({'text': prompts}))
eval_dataset


Dataset({
    features: ['text'],
    num_rows: 100
})

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():

        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

print(model)

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 3072, padding_idx=0)
    (layers): ModuleList(
      (0-27): 28 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=3072, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=24576, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=24576, bias=False)
          (down_proj): Linear4bit(in_features=24576, out_features=3072, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
   

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    # target_modules=["query_key_value"],
    target_modules=['o_proj', 'q_proj', 'up_proj', 'v_proj', 'k_proj', 'down_proj', 'gate_proj'], #specific to Gemma models.
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# from transformers import TrainingArguments
# from trl import SFTConfig, SFTTrainer

# # Define the model initialization kwargs
# model_init_kwargs = {}

# # Initialize the SFTConfig with model_init_kwargs
# config = SFTConfig(model_init_kwargs=model_init_kwargs, output_dir="Fine_Tuned_Gemma")


In [None]:
from transformers import TrainingArguments

from trl import SFTConfig, SFTTrainer
training_arguments = SFTConfig(
    "Fine_Tuned_Gemma",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=2e-5,
    fp16=True,
    weight_decay=0.01,
    max_grad_norm=0.3,
    num_train_epochs=5,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    lr_scheduler_type="cosine",
    seed=42,
    push_to_hub = True,
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

In [None]:
from trl import SFTTrainer

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=100,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Currently logged in as: [33mmondol007[0m ([33mdeep-quest[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss
31,3.0911,3.139008
62,1.9582,2.304088
93,1.7723,2.319348


Checkpoint destination directory Fine_Tuned_Gemma/checkpoint-31 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory Fine_Tuned_Gemma/checkpoint-62 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory Fine_Tuned_Gemma/checkpoint-93 already exists and is non-empty. Saving will proceed but saved results may be invalid.


# Prediction

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import transformers

model_id_1 = '/kaggle/working/Fine_Tuned_Gemma'


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

fine_tuned_gemma = AutoModelForCausalLM.from_pretrained(model_id_1, quantization_config=bnb_config, device_map="auto")
tokenizer_1 = AutoTokenizer.from_pretrained(model_id_1)
tokenizer_1.pad_token = tokenizer_1.eos_token
tokenizer_1.padding_side = "right"


In [None]:
dataset_test = dataset_test.to_pandas()

In [None]:
SYSTEM_PROMPT = """Summarize the following article."""


syntheses_with_gemma = []

for idx, row in dataset_test.iterrows():
    article = row['article']

    prompt = f"""<start_of_turn>user\n{SYSTEM_PROMPT}\n{article}\n<end_of_turn>\n"""

    # Tokenize the prompt
    inputs = tokenizer_1(prompt, return_tensors="pt")

    # Generate response
    output = fine_tuned_gemma.generate(input_ids=inputs["input_ids"], max_new_tokens=100)

    # Decode the response
    generated_text = tokenizer_1.decode(output[0], skip_special_tokens=True)
    #print(generated_text)

    #Finding Extracted Answer
    index_of_answer = generated_text.find("<end_of_turn>")

    # Extract the text after "Answer:"
    model_summary = generated_text[index_of_answer + len("<end_of_turn>"):].strip()
    syntheses_with_gemma.append(model_summary)
    #print(answer_text)
    print(idx+1)

    #Comparing the answer with the base answer
    dash_line = '-'.join('' for x in range(100))
    summary = dataset_test.loc[idx, 'highlights']
#     print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
#     print(dash_line)
#     print(f'MODEL GENERATION - ZERO SHOT:\n{model_summary}')
#     print(dash_line)

#Appending it to main file
dataset_test['Generated_BY_GEMMA'] = syntheses_with_gemma


In [None]:
dataset_test.head()

In [None]:
from IPython.display import FileLink
Fine_Tune_Gemma = dataset_test.rename(columns={'article': 'article', 'highlights': 'highlights', 'id': 'id', 'Generated_BY_Gemma': 'Generated_BY_Gemma'})
Fine_Tune_Gemma.to_csv('Fine_Tune_Gemma.csv', index=False)
FileLink('Fine_Tune_Gemma.csv')

# Evaluation

In [None]:
!pip install rouge_score
!pip install scikit-learn

import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')


In [None]:
def calculate_metrics(reference, candidate):
    # Tokenize reference and candidate
    reference_tokens = nltk.word_tokenize(reference)
    candidate_tokens = nltk.word_tokenize(candidate)

    # Calculate BLEU score
    bleu_score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=SmoothingFunction().method7)

    # Calculate ROUGE scores
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)

    return bleu_score, scores

In [None]:
# Iterate through rows of the DataFrame and calculate scores
from rouge_score import rouge_scorer

bleu_scores = []
rouge1_scores = []
rouge2_scores = []
rougeL_scores = []

for index, row in Fine_Tune_Gemma.iterrows():
    reference = row['highlights']
    candidate = row['Generated_BY_GEMMA']

    bleu_score, rouge_scores = calculate_metrics(reference, candidate)

    bleu_scores.append(bleu_score)
    rouge1_scores.append(rouge_scores['rouge1'].fmeasure)
    rouge2_scores.append(rouge_scores['rouge2'].fmeasure)
    rougeL_scores.append(rouge_scores['rougeL'].fmeasure)

# Calculate average scores
avg_bleu_score = sum(bleu_scores) / len(bleu_scores)
avg_rouge1_score = sum(rouge1_scores) / len(rouge1_scores)
avg_rouge2_score = sum(rouge2_scores) / len(rouge2_scores)
avg_rougeL_score = sum(rougeL_scores) / len(rougeL_scores)

# Print average scores
print(f"Average BLEU Score: {avg_bleu_score}")
print(f"Average ROUGE-1 Score: {avg_rouge1_score}")
print(f"Average ROUGE-2 Score: {avg_rouge2_score}")
print(f"Average ROUGE-L Score: {avg_rougeL_score}")

In [None]:
!pip install bert_score

In [None]:
from bert_score import score
import pandas as pd

# Initialize lists to store scores
avg_precision_list = []
avg_recall_list = []
avg_f1_list = []

# Iterate through rows of the DataFrame and calculate scores
for index, row in Fine_Tune_Gemma.iterrows():
    reference = [row['highlights']]
    candidate = [row['Generated_BY_GEMMA']]

    # Calculate BERTScore
    P, R, F1 = score(candidate, reference, lang="en")

    avg_precision = P.mean().item()
    avg_recall = R.mean().item()
    avg_f1 = F1.mean().item()

    avg_precision_list.append(avg_precision)
    avg_recall_list.append(avg_recall)
    avg_f1_list.append(avg_f1)

# Calculate average precision, recall, and F1 score
avg_precision = sum(avg_precision_list) / len(avg_precision_list)
avg_recall = sum(avg_recall_list) / len(avg_recall_list)
avg_f1 = sum(avg_f1_list) / len(avg_f1_list)

# Print average precision, recall, and F1 score
print(f"Average Precision: {avg_precision}")
print(f"Average Recall: {avg_recall}")
print(f"Average F1 Score: {avg_f1}")
