In [None]:
# Import the hf_hub_download function from the Hugging Face Hub library
from huggingface_hub import hf_hub_download

# Import the Llama class from the llama_cpp library
from llama_cpp import Llama


In [None]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format

In [None]:
model_path = hf_hub_download(
    repo_id=model_name_or_path,
    filename=model_basename
)

llama-2-13b-chat.Q5_K_M.gguf:   0%|          | 0.00/9.23G [00:00<?, ?B/s]

In [None]:
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=43, # Change this value based on your model and your GPU VRAM pool.
    n_ctx=4096, # Context window
)

AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 



## **Loading the IMDb Reviews Dataset**

In [None]:
# Import the random module for generating random numbers or choices.
import random

# Import the pandas library and alias it as 'pd' for easier use.
import pandas as pd

# Import the 'load_dataset' function from the 'datasets' library for loading datasets.
from datasets import load_dataset


In [None]:
# Load the IMDb dataset using the 'load_dataset' function
dataset = load_dataset("imdb")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [None]:
imdb_train_df = dataset['train'].to_pandas()   ## Tto_pandas is used to convert the training data into a pandas DataFrame.
imdb_test_df = dataset['test'].to_pandas()  ##: It converts the testing data into another pandas DataFrame.

In [None]:
(imdb_train_df.shape, imdb_test_df.shape)

((25000, 2), (25000, 2))

In [None]:
positive_examples = imdb_train_df.loc[imdb_train_df.label == 1, :].sample(3)
negative_examples = imdb_train_df.loc[imdb_train_df.label == 0, :].sample(3)

In [None]:
positive_examples

Unnamed: 0,text,label
18949,Despite being told from a British perspective ...,1
19406,Legend of Dragoon is one of those little-known...,1
17477,Kabei: Our Mother (2008) is a poetic and subli...,1


In [None]:
negative_examples

Unnamed: 0,text,label
3613,I've seen thousands of movies and have never w...,0
2349,I missed this movie in the cinema but had some...,0
6313,And how many actors can he get to stand in for...,0


In [None]:
#concatenating the subsets of positive and negative sentiment examples (positive_examples and negative_examples)
examples = pd.concat([positive_examples, negative_examples]).to_json(orient='records')

#After concatenating the DataFrames, the to_json() method is applied to convert the combined DataFrame into a JSON (JavaScript Object Notation) format.
#The orient='records' argument specifies the format in which the JSON data should be structured.

In [None]:
# Import the 'json' module for handling JSON data.
import json

# Import the 'numpy' library and alias it as 'np' for convenience.
import numpy as np

# Import the 'Counter' class from the 'collections' module for counting occurrences of elements.
from collections import Counter

# Import 'tqdm' for displaying progress bars when iterating over data.
from tqdm import tqdm


In [None]:
system_message = """[INST]<<SYS>>Classify the sentiment of movie reviews presented in the input as 'positive' or 'negative'.
Movie reviews will be delimited by triple backticks in the input.
Answer only 'positive' or 'negative'. Do not explain your answer.

Instructions:
1. Carefully read the text of the review and consider the overall sentiment of the review
2. Estimate the probability of the review being positive

To re-iterate, your answer should strictly only contain the label: positive or negative.

Some examples of expected output are provided below as guidance.<</SYS>>[/INST]
"""

In [None]:
prompt_template = """
[INST] ```{input_data}``` [/INST]
{output}
"""

In [None]:
## Initialize an empty string to store few-shot examples
few_shot_examples = ''

In [None]:
## Iterate through each example in the JSON data which was created earlier
for example in json.loads(examples):
        # Extract the input data (text) from the example, excluding the 'label'

    example_input = {i:example[i] for i in example if i!='label'}
    # Determine the sentiment prediction based on the 'label' value
    if example['label'] == 0:
        example_prediction = 'negative'
    else:
        example_prediction = 'positive'

    # Concatenate the input data and the predicted sentiment
    # using a template and add it to the 'few_shot_examples' string

    few_shot_examples += prompt_template.format(
        input_data=example_input['text'],  ###input_data is used in the prompt_template
        output=example_prediction          ###outpu is used in the prompt_template
    )

In [None]:
test_rows = json.loads(
    imdb_test_df.sample(100).to_json(orient='records')
)

In [None]:
## Initialize empty lists to store model predictions and ground truth values.
model_predictions, ground_truths = [], []

In [None]:
## Iterate through each row in the test data with a progress bar
for row in tqdm(test_rows):
      # Extract the input data (text) from the current row, excluding the 'label'
    test_input = {i:row[i] for i in row if i!='label'}

        # Construct a few-shot prompt by combining system message, few-shot examples, and test input
    few_shot_prompt = (
        system_message + few_shot_examples +
        prompt_template.format(
            input_data=test_input['text'],
            output=''
        )
    )

    try:
        # Use the model (lcpp_llm) to generate a response based on the few-shot prompt
        response = lcpp_llm(
            prompt=few_shot_prompt,
            max_tokens=2,
            temperature=0,
            top_p=0.95,
            repeat_penalty=1.2,
            top_k=50,
            stop=['INST'], # Dynamic stopping when such token is detected.
            echo=False # do not return the prompt
        )
        # Extract the model's prediction from the response

        prediction = response["choices"][0]["text"]

        # Append the model's prediction to the 'model_predictions' list, lowercased and stripped of whitespace
        model_predictions.append(prediction.strip().lower())

        # Determine the ground truth label based on the row's 'label' value and append it to 'ground_truths'
        if row['label'] == 0:
            ground_truths.append('negative')
        else:
            ground_truths.append('positive')
    except ValueError as e:
          # Handle any ValueErrors that may occur during the process and continue with the next row

        print(e)
        continue

  1%|          | 1/100 [01:42<2:48:41, 102.23s/it]Llama.generate: prefix-match hit
  2%|▏         | 2/100 [01:47<1:14:01, 45.32s/it] Llama.generate: prefix-match hit
  3%|▎         | 3/100 [02:01<50:15, 31.09s/it]  Llama.generate: prefix-match hit
  4%|▍         | 4/100 [02:30<48:17, 30.19s/it]Llama.generate: prefix-match hit
  5%|▌         | 5/100 [03:20<58:56, 37.22s/it]Llama.generate: prefix-match hit
  6%|▌         | 6/100 [03:40<49:14, 31.43s/it]Llama.generate: prefix-match hit
  7%|▋         | 7/100 [04:14<49:44, 32.09s/it]Llama.generate: prefix-match hit
  8%|▊         | 8/100 [04:22<37:44, 24.61s/it]Llama.generate: prefix-match hit


Requested tokens (4148) exceed context window of 4096


 10%|█         | 10/100 [04:40<25:45, 17.17s/it]Llama.generate: prefix-match hit
 11%|█         | 11/100 [04:47<21:41, 14.62s/it]Llama.generate: prefix-match hit
 12%|█▏        | 12/100 [05:00<20:34, 14.03s/it]Llama.generate: prefix-match hit
 13%|█▎        | 13/100 [05:13<19:58, 13.78s/it]Llama.generate: prefix-match hit
 14%|█▍        | 14/100 [05:27<19:54, 13.89s/it]Llama.generate: prefix-match hit
 15%|█▌        | 15/100 [05:41<19:45, 13.94s/it]Llama.generate: prefix-match hit
 16%|█▌        | 16/100 [05:59<21:14, 15.17s/it]Llama.generate: prefix-match hit
 17%|█▋        | 17/100 [07:01<39:50, 28.80s/it]Llama.generate: prefix-match hit
 18%|█▊        | 18/100 [07:09<31:12, 22.84s/it]Llama.generate: prefix-match hit
 19%|█▉        | 19/100 [07:16<24:27, 18.12s/it]Llama.generate: prefix-match hit
 20%|██        | 20/100 [07:47<29:08, 21.86s/it]Llama.generate: prefix-match hit
 21%|██        | 21/100 [07:57<24:17, 18.45s/it]Llama.generate: prefix-match hit
 22%|██▏       | 22/100 [08:

Requested tokens (4196) exceed context window of 4096


 73%|███████▎  | 73/100 [19:37<04:32, 10.08s/it]Llama.generate: prefix-match hit
 74%|███████▍  | 74/100 [19:51<04:48, 11.10s/it]Llama.generate: prefix-match hit
 75%|███████▌  | 75/100 [20:20<06:32, 15.69s/it]Llama.generate: prefix-match hit
 76%|███████▌  | 76/100 [20:31<05:45, 14.38s/it]Llama.generate: prefix-match hit
 77%|███████▋  | 77/100 [20:39<04:51, 12.68s/it]Llama.generate: prefix-match hit
 78%|███████▊  | 78/100 [20:56<05:06, 13.93s/it]Llama.generate: prefix-match hit
 79%|███████▉  | 79/100 [21:13<05:08, 14.67s/it]Llama.generate: prefix-match hit
 80%|████████  | 80/100 [21:28<04:57, 14.87s/it]Llama.generate: prefix-match hit
 81%|████████  | 81/100 [22:01<06:25, 20.28s/it]Llama.generate: prefix-match hit
 82%|████████▏ | 82/100 [22:16<05:35, 18.61s/it]Llama.generate: prefix-match hit
 83%|████████▎ | 83/100 [22:37<05:27, 19.25s/it]Llama.generate: prefix-match hit
 84%|████████▍ | 84/100 [22:52<04:48, 18.05s/it]Llama.generate: prefix-match hit
 85%|████████▌ | 85/100 [23:

In [None]:
Counter(model_predictions)

Counter({'positive': 52, 'negative': 45, 'neutral': 1})

In [None]:
Counter(ground_truths)

Counter({'positive': 52, 'negative': 46})

In [None]:
ground_truths = np.array(ground_truths)
model_predictions = np.array(model_predictions)

In [None]:
(ground_truths == model_predictions).mean()

0.9489795918367347

In [None]:
TP = ((model_predictions == 'positive') & (ground_truths == 'positive')).sum()
FP = ((model_predictions == 'positive') & (ground_truths == 'negative')).sum()
precision = TP / (TP+FP)

In [None]:
precision

0.9615384615384616

In [None]:
dataset = load_dataset("AdiOO7/Bank_Complaints")

Downloading readme:   0%|          | 0.00/130 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/573k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
system_message = """[INST]<<SYS>>As a spokesperson person of a particular bank, you are tasked to give a public response to a user's complaint presented as input.
Instructions:
1. Carefully observe the intensity and severity of the complaint received as input.
2. Choose a carefully worded public response. You need to reply to every complaint, however, responding with "Company chooses not to provide a public response" is also a valid response.
Some examples of appropriate response are provided below as guidance.<</SYS>>[/INST]
"""

In [None]:
prompt_template = """
[INST] {input_example} [/INST]
{output_example}
"""

In [None]:
few_shot_examples = ''

In [None]:
for i in range(5):
    sample_document = dataset['train'][random.randint(0, 1829)]
    user_input_example = sample_document['Input']
    assistant_output_example = sample_document['Response']

    few_shot_examples += prompt_template.format(
        input_example=user_input_example,
        output_example=assistant_output_example
    )

In [None]:
test_document = dataset['train'][random.randint(0, 1829)]
new_complaint = test_document['Input']

In [None]:
few_shot_prompt = (
    system_message +
    few_shot_examples +
    prompt_template.format(
        input_example=new_complaint,
        output_example=''
    )
)

In [None]:
response = lcpp_llm(
    prompt=few_shot_prompt,
    max_tokens=256,
    temperature=0,
    top_p=0.95,
    repeat_penalty=1.2,
    top_k=50,
    stop=['INST'], # Dynamic stopping when such token is detected.
    echo=False # do not return the prompt
)

Llama.generate: prefix-match hit


In [None]:
new_complaint

'They have misspell my name and my wrong employer name. \n'

In [None]:
print(response["choices"][0]["text"])

Company apologizes for any inconvenience caused by these errors, we will work to correct them as soon as possible.
