## Librairies

In [None]:
!pip install transformers
!pip install datasets
!pip install evaluate
!pip install tensorrt

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

device='cuda' if torch.cuda.is_available() else 'cpu'

## Chargement du Modèle et Quantification

In [None]:
## Chargement du modèle

from transformers import GPT2LMHeadModel, GPT2TokenizerFast

model_id = "gpt2-large" ##774M
model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)

In [None]:
# Récupération Paramètres et Flatten

L=[]
for i,param in enumerate(model.parameters()):
    param_flatten=param.flatten()
    L.append(param_flatten)

In [None]:
##Appel de Notre Méthode

import LogitCompression

# Bloc_params,Weight_compressed=LogitCompression.Compression(L)
# new_weight_total=LogitCompression.Decompression(Bloc_params,Weight_compressed)

import QLoRA

List_rescale_values,Total_List_W=QLoRA.CompressionPoids(L)
values_mean,new_bloc_rescale=QLoRA.CompressionRescale(List_rescale_values)
new_rescale_bien_mise=QLoRA.DecompressionRescale(values_mean,new_bloc_rescale)
new_weight_total=QLoRA.DecompressionPoids(new_rescale_bien_mise,Total_List_W)

  0%|          | 0/436 [41:34<?, ?it/s]


KeyboardInterrupt: ignored

In [None]:
## Reconstruction du modèle

for i,param in enumerate(model.parameters()):
    dim = param.shape
    popo = torch.FloatTensor(new_weight_total[i][:np.prod(list(dim))])
    popo2=torch.reshape(popo,dim)
    param.data =popo2

## Perplexity

In [None]:
## Datasets for Perplexity
from datasets import load_dataset

#WikiText2
test = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")

#C4
# test = load_dataset("c4", "en", split="test")

#PennTreeBank
# test = load_dataset("ptb_text_only", split="test")

In [None]:
## Tokenizer du dataset

encodings = tokenizer("\n\n".join(test["text"]), return_tensors="pt")

In [None]:
import torch
from tqdm import tqdm

max_length = model.config.n_positions
stride = 512
seq_len = encodings.input_ids.size(1)

nlls = []
prev_end_loc = 0
for begin_loc in tqdm(range(0, seq_len, stride)):
    end_loc = min(begin_loc + max_length, seq_len)
    trg_len = end_loc - prev_end_loc  # may be different from stride on last loop
    input_ids = encodings.input_ids[:, begin_loc:end_loc].to(device)
    target_ids = input_ids.clone()
    target_ids[:, :-trg_len] = -100

    with torch.no_grad():
        outputs = model(input_ids, labels=target_ids)
        neg_log_likelihood = outputs.loss

    nlls.append(neg_log_likelihood)

    prev_end_loc = end_loc
    if end_loc == seq_len:
        break

ppl = torch.exp(torch.stack(nlls).mean())


In [None]:
print(ppl)

## Zero-shot Accuracy

In [None]:
!git clone https://github.com/EleutherAI/lm-evaluation-harness
!pip /content/lm-evaluation-harness/setup.py install -e .

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
model.push_to_hub("Logit_compression_gpt2")

In [None]:
# piqa, winogrande ,arc_easy, arc_challenge

!python /content/lm-evaluation-harness/main.py \
    --model gpt2 \
    --model_args pretrained=Emma5099/Logit_compression_gpt2 \
    --tasks hellaswag,piqa,winogrande,arc_easy,arc_challenge \
    --device cuda:0


## MSE

In [None]:
## Evaluation MSE Notre Méthode

model = torch.load('/content/resnet20_check_point.pth', map_location='cpu')
mse_total=0
for i,param in enumerate(model.parameters()):

    dim = param.shape
    popo = torch.FloatTensor(new_weight_total[i][:np.prod(list(dim))])
    popo2=torch.FloatTensor(torch.reshape(popo,dim))

    mse_total+=torch.sum(torch.square(param-popo2))

# 0.0378
print(mse_total)