In [None]:
!pip install transformers
!pip install bitsandbytes
!pip install datasets

[0mCollecting bitsandbytes
  Downloading bitsandbytes-0.38.1-py3-none-any.whl (104.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.3/104.3 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.38.1
[0m

In [None]:
from sklearn.model_selection import train_test_split

import transformers

import pandas as pd

import torch
import torch.nn.functional as F
from torch import nn
from torch.cuda.amp import custom_fwd, custom_bwd

from bitsandbytes.functional import quantize_blockwise, dequantize_blockwise

from tqdm.auto import tqdm




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /opt/conda/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)


**Quatization**

In [None]:
class FrozenBNBLinear(nn.Module):
    def __init__(self, weight, absmax, code, bias=None):
        assert isinstance(bias, nn.Parameter) or bias is None
        super().__init__()
        self.out_features, self.in_features = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
        self.bias = bias
 
    def forward(self, input):
        output = DequantizeAndLinear.apply(input, self.weight, self.absmax, self.code, self.bias)
        if self.adapter:
            output_cloned = torch.clone(output + self.adapter(input))
            return output_cloned
        else :
            return output
 
    @classmethod
    def from_linear(cls, linear: nn.Linear) -> "FrozenBNBLinear":
        weights_int8, state = quantize_blockise_lowmemory(linear.weight)
        return cls(weights_int8, *state, linear.bias)
 
    def __repr__(self):
        return f"{self.__class__.__name__}({self.in_features}, {self.out_features})"
 
 
class DequantizeAndLinear(torch.autograd.Function): 
    @staticmethod
    @custom_fwd
    def forward(ctx, input: torch.Tensor, weights_quantized: torch.ByteTensor,
                absmax: torch.FloatTensor, code: torch.FloatTensor, bias: torch.FloatTensor):
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        ctx.save_for_backward(input, weights_quantized, absmax, code)
        ctx._has_bias = bias is not None
        return F.linear(input, weights_deq, bias).clone()
 
    @staticmethod
    @custom_bwd
    def backward(ctx, grad_output: torch.Tensor):
        assert not ctx.needs_input_grad[1] and not ctx.needs_input_grad[2] and not ctx.needs_input_grad[3]
        input, weights_quantized, absmax, code = ctx.saved_tensors
        # grad_output: [*batch, out_features]
        weights_deq = dequantize_blockwise(weights_quantized, absmax=absmax, code=code)
        grad_input = grad_output @ weights_deq
        grad_bias = grad_output.flatten(0, -2).sum(dim=0) if ctx._has_bias else None
        return grad_input, None, None, None, grad_bias
 
 
class FrozenBNBEmbedding(nn.Module):
    def __init__(self, weight, absmax, code):
        super().__init__()
        self.num_embeddings, self.embedding_dim = weight.shape
        self.register_buffer("weight", weight.requires_grad_(False))
        self.register_buffer("absmax", absmax.requires_grad_(False))
        self.register_buffer("code", code.requires_grad_(False))
        self.adapter = None
 
    def forward(self, input, **kwargs):
        with torch.no_grad():
            # note: both quantuized weights and input indices are *not* differentiable
            weight_deq = dequantize_blockwise(self.weight, absmax=self.absmax, code=self.code)
            output = F.embedding(input, weight_deq, **kwargs)
        
        if self.adapter:
            output_cloned = torch.clone(output + self.adapter(input))
            return output_cloned
        else :
            return output
 
    @classmethod
    def from_embedding(cls, embedding: nn.Embedding) -> "FrozenBNBEmbedding":
        weights_int8, state = quantize_blockise_lowmemory(embedding.weight)
        return cls(weights_int8, *state)
 
    def __repr__(self):
        return f"{self.__class__.__name__}({self.num_embeddings}, {self.embedding_dim})"
 
 
def quantize_blockise_lowmemory(matrix: torch.Tensor, chunk_size: int = 2 ** 20):
    assert chunk_size % 4096 == 0
    code = None
    chunks = []
    absmaxes = []
    flat_tensor = matrix.view(-1)
    for i in range((matrix.numel() - 1) // chunk_size + 1):
        input_chunk = flat_tensor[i * chunk_size: (i + 1) * chunk_size].clone()
        quantized_chunk, (absmax_chunk, code) = quantize_blockwise(input_chunk, code=code)
        chunks.append(quantized_chunk)
        absmaxes.append(absmax_chunk)
 
    matrix_i8 = torch.cat(chunks).reshape_as(matrix)
    absmax = torch.cat(absmaxes)
    return matrix_i8, (absmax, code)
 
 
def convert_to_int8(model):
    """Convert linear and embedding modules to 8-bit with optional adapters"""
    for module in list(model.modules()):
        for name, child in module.named_children():
            if isinstance(child, nn.Linear):
                print(name, child)
                setattr(
                    module,
                    name,
                    FrozenBNBLinear(
                        weight=torch.zeros(child.out_features, child.in_features, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                        bias=child.bias,
                    ),
                )
            elif isinstance(child, nn.Embedding):
                setattr(
                    module,
                    name,
                    FrozenBNBEmbedding(
                        weight=torch.zeros(child.num_embeddings, child.embedding_dim, dtype=torch.uint8),
                        absmax=torch.zeros((child.weight.numel() - 1) // 4096 + 1),
                        code=torch.zeros(256),
                    )
                )

In [None]:
class GPTJBlock(transformers.models.gptj.modeling_gptj.GPTJBlock):
    def __init__(self, config):
        super().__init__(config)

        convert_to_int8(self.attn)
        convert_to_int8(self.mlp)


class GPTJModel(transformers.models.gptj.modeling_gptj.GPTJModel):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)
        

class GPTJForCausalLM(transformers.models.gptj.modeling_gptj.GPTJForCausalLM):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)


transformers.models.gptj.modeling_gptj.GPTJBlock = GPTJBlock

In [None]:
class T5ForConditionalGeneration(transformers.models.t5.modeling_t5.T5ForConditionalGeneration):
    def __init__(self, config):
        super().__init__(config)
        convert_to_int8(self)

transformers.models.t5.modeling_t5.T5ForConditionalGeneration = T5ForConditionalGeneration

In [None]:
config = transformers.GPTJConfig.from_pretrained("EleutherAI/gpt-j-6B")
tokenizer = transformers.AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

Downloading (…)lve/main/config.json:   0%|          | 0.00/930 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/4.04k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/357 [00:00<?, ?B/s]

In [None]:
config.pad_token_id = config.eos_token_id
tokenizer.pad_token = config.pad_token_id

In [None]:
gpt = GPTJForCausalLM.from_pretrained("hivemind/gpt-j-6B-8bit", low_cpu_mem_usage=True)
#gpt = GPTJForCausalLM.from_pretrained("gustavecortal/fr-boris-8bit", low_cpu_mem_usage=True) French GPT-J Cedille's Boris

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, bias=False)
fc_in Linear(in_features=4096, out_features=16384, bias=True)
fc_out Linear(in_features=16384, out_features=4096, bias=True)
k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, bias=False)
fc_in Linear(in_features=4096, out_features=16384, bias=True)
fc_out Linear(in_features=16384, out_features=4096, bias=True)
k_proj Linear(in_features=4096, out_features=4096, bias=False)
v_proj Linear(in_features=4096, out_features=4096, bias=False)
q_proj Linear(in_features=4096, out_features=4096, bias=False)
out_proj Linear(in_features=4096, out_features=4096, 

In [None]:
if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu"  
device = torch.device(dev)  

gpt.to(device)

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): FrozenBNBEmbedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): FrozenBNBLinear(4096, 4096)
          (v_proj): FrozenBNBLinear(4096, 4096)
          (q_proj): FrozenBNBLinear(4096, 4096)
          (out_proj): FrozenBNBLinear(4096, 4096)
        )
        (mlp): GPTJMLP(
          (fc_in): FrozenBNBLinear(4096, 16384)
          (fc_out): FrozenBNBLinear(16384, 4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): FrozenBNBLinear(4096, 50400)
)

In [None]:
import torch
import transformers
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from transformers import AutoTokenizer, AutoModelForCausalLM
import csv
# Load dataset
df = pd.read_csv('/kaggle/input/nlpgettingstarted/train.csv')
df = df[['text', 'target']]
df = df.rename(columns={'text': 'input_text', 'target': 'label'})
# Split dataset into training and test sets
train_data, test_data = train_test_split(df, test_size=0.8, random_state=42)



In [None]:
# Train and evaluate model
train_data1 = train_data['input_text'].to_list()
y_train_data = train_data['label'].astype(str).to_list()
test_data = test_data[['input_text', 'label']]
y_test_data = test_data['label'].to_list()

# Encodage des données d'entraînement avec le tokenizer
model_inputs = tokenizer(train_data1, padding=True, truncation=True, max_length=128, return_tensors='pt').to(device)


# **Automatic Prompt Engineer (APE)**

In [None]:
# Install Dependencies
! pip install git+https://github.com/keirp/automatic_prompt_engineer

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting git+https://github.com/keirp/automatic_prompt_engineer
  Cloning https://github.com/keirp/automatic_prompt_engineer to /tmp/pip-req-build-q70tvh_v
  Running command git clone --filter=blob:none --quiet https://github.com/keirp/automatic_prompt_engineer /tmp/pip-req-build-q70tvh_v
  Resolved https://github.com/keirp/automatic_prompt_engineer to commit 21080cca80a688b0de6953f6eaa8f0fc6f2bc630
  Preparing metadata (setup.py) ... [?25ldone
Collecting openai
  Downloading openai-0.27.6-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting gradio
  Downloading gradio-3.30.0-py3-none-any.whl (17.3 MB)


In [None]:
import openai
openai.api_key = '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'

In [None]:
eval_template = \
"""Instruction: [PROMPT]
Input: [INPUT]
Output: [OUTPUT]"""

In [None]:
# Use APE to find prompts
from automatic_prompt_engineer import ape

result, demo_fn = ape.simple_ape(
    dataset=(train_data1, y_train_data),
    eval_template=eval_template,
)

Generating prompts...
[GPT_forward] Generating 50 completions, split into 1 batches of size 2000


100%|██████████| 1/1 [00:02<00:00,  3.00s/it]


Model returned 50 prompts. Deduplicating...
Deduplicated to 50 prompts.
Evaluating prompts...


Evaluating prompts: 100%|██████████| 20/20 [00:25<00:00,  1.29s/it]

Finished evaluating.





In [None]:
print(result)

score: prompt
----------------
-1.11:  produce an output of 1 if the input was a news article and 0 if it was not.
-1.18:  "input a URL". The output should have been "1", indicating that the input was a valid URL.
-1.19:  produce a 1 if the input is a news article and a 0 if the input is not a news article.
-1.21:  give a 1 for any input that contained the word "earthquake" and a 0 for any input that did not contain the word "earthquake."
-1.32:  input a string of text and output a 1 if the text contained a link and a 0 if the text did not contain a link.
-1.36:  input a message and get a response of 0 or 1. 0 indicates that the message is not positive, and 1 indicates that the message is positive. Based on the given input-output pairs, it seems that the instruction is to input a message and
-1.37:  input a URL and output a 1 if the URL is valid and a 0 if the URL is invalid.
-1.40:  "Input a string of text and output a 1 if the text contains the word 'quake' and a 0 if the text does n

In [None]:
predictions_auto=[]
for i in test_data['input_text']:
    prompt_text = '''Input: {} Output:'''.format(i)

    prompt = tokenizer(prompt_text, truncation=True, return_tensors='pt')
    prompt = {key: value.to(device) for key, value in prompt.items()}
    out = gpt.generate(**prompt, max_length=1, top_k=40, top_p=0.9, temperature=0.3, do_sample=False, repetition_penalty = 1.1, num_beams=1, pad_token_id=0)
    y_hat = tokenizer.decode(out[0][-1]).strip();
#     print('y_hat', y_hat)
#     print(tokenizer.decode(out[0]))
    predictions_auto.append(int(y_hat) if y_hat.isdigit() else None)    

Input length of input_ids is 20, but `max_length` is set to 1. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 46, but `max_length` is set to 1. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 54, but `max_length` is set to 1. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 29, but `max_length` is set to 1. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 24, but `max_length` is set to 1. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 29, but `max_length` is set to 1. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
Input length of input_ids is 38, but `max_length` is set to 1. This can lead to unexpected behavior.

In [None]:
# Define evaluation function
from sklearn.metrics import accuracy_score
def evaluate(prediction, target):
    return accuracy_score(prediction, target)

In [None]:
import numpy as np
evaluate(np.zeros(len(y_test_data)),y_test_data)

0.5590551181102362