<a href="https://colab.research.google.com/github/SkwarczynskiP/Transformer-Based-Models/blob/main/Section2.3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U langchain transformers bitsandbytes accelerate
!pip install datasets

In [None]:
import torch
from transformers import BitsAndBytesConfig
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from datasets import load_dataset
import warnings
from datasets import metric
import numpy as np
from tqdm import tqdm
warnings.filterwarnings("ignore", message="`do_sample` is set to `False`. However, `temperature` is set to `0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.")
warnings.filterwarnings("ignore", message="`do_sample` is set to `False`. However, `top_k` is set to `5` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_k`.")


In [None]:
# Begining of Model #1 - mistralai/Mistral-7B-Instruct-v0.1

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model_4bit = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
pipeline_inst = pipeline(
        "text-generation",
        model=model_4bit,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=512,
        do_sample=False,
        temperature = 0,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

llm = HuggingFacePipeline(pipeline=pipeline_inst)

template = "Is this text message spam? Answer 'Yes' if the message is spam, answer 'No' if you are unsure. Please only enter one word, either 'Yes' or 'No': "

In [None]:
def generate_response(question):
    prompt = PromptTemplate(template=template, input_variables=["question","context"])
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    response = llm_chain.run({"question":question})
    return response

In [None]:
dataset = load_dataset("sms_spam", trust_remote_code=True)
test_texts = dataset['train']['sms']
labels_test = dataset['train']['label']

In [None]:
print(test_texts[:1]) # For testing the loading of the dataset

['Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\n']


In [None]:
output_modelOne = []

def generate_response(question): # Commented out portions were for testing purposes
  try:
    # print(question)
    # print("\n")
    prompt = PromptTemplate(template=template, input_variables=["question","context"])
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    response = llm_chain.run({"question":question})
    # print(response)
    # print("\n")
    return response
  except Exception as e:
    print("Exception: ")
    print(e)

for text in tqdm(test_texts[0:1000]):
    result = generate_response(text)
    last_word = result.split()[-1]
    if last_word == 'Yes': # Check if the last word is 'Yes' or 'No'
        output_modelOne.append(1)
    elif last_word == 'No':
        output_modelOne.append(0)

100%|██████████| 1000/1000 [08:03<00:00,  2.07it/s]


In [None]:
print(output_modelOne) # For testing the output of the generated response

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
correct_predictions = sum(predicted == actual for predicted, actual in zip(output_modelOne, labels_test[0:1000]))
accuracy = correct_predictions / len(labels_test[0:1000])
print(f'Accuracy for mistralai/Mistral-7B-Instruct-v0.1: {accuracy}')

Accuracy for mistralai/Mistral-7B-Instruct-v0.1: 0.152


In [None]:
# Beginning of Model #2 - bigscience/bloomz-560m

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model_4bit = AutoModelForCausalLM.from_pretrained("bigscience/bloomz-560m", device_map="auto",quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")


In [None]:
pipeline_inst = pipeline(
        "text-generation",
        model=model_4bit,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=512,
        do_sample=False,
        temperature = 0,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

llm = HuggingFacePipeline(pipeline=pipeline_inst)

template = "Is this text message spam? Answer 'Yes' if the message is spam, answer 'No' if you are unsure. Please only enter one word, either 'Yes' or 'No': "

In [None]:
output_modelTwo = []

def generate_response(question): # Commented out portions were for testing purposes
  try:
    # print(question)
    # print("\n")
    prompt = PromptTemplate(template=template, input_variables=["question","context"])
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    response = llm_chain.run({"question":question})
    # print(response)
    # print("\n")
    return response
  except Exception as e:
    print("Exception: ")
    print(e)

for text in tqdm(test_texts[0:1000]):
    result = generate_response(text)
    last_word = result.split()[-1]
    if last_word == 'Yes': # Check if the last word is 'Yes' or 'No'
        output_modelTwo.append(1)
    elif last_word == 'No':
        output_modelTwo.append(0)

100%|██████████| 1000/1000 [01:53<00:00,  8.85it/s]


In [None]:
correct_predictions = sum(predicted == actual for predicted, actual in zip(output_modelTwo, labels_test[0:1000]))
accuracy = correct_predictions / len(labels_test[0:1000])
print(f'Accuracy for bigscience/bloomz-560m: {accuracy}')

Accuracy for bigscience/bloomz-560m: 0.152
