# Dependences

In [1]:
!pip install --upgrade google-api-python-client
!pip install bitsandbytes>=0.39.0
!pip install git+https://github.com/huggingface/transformers.git
!pip install git+https://github.com/huggingface/accelerate.git
!pip install tiktoken
!pip install torch
!pip install scipy

Collecting google-api-python-client
  Downloading google_api_python_client-2.92.0-py2.py3-none-any.whl (11.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hCollecting httplib2<1.dev0,>=0.15.0 (from google-api-python-client)
  Downloading httplib2-0.22.0-py3-none-any.whl (96 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.9/96.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-auth<3.0.0.dev0,>=1.19.0 (from google-api-python-client)
  Downloading google_auth-2.21.0-py2.py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.1/182.1 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-auth-httplib2>=0.1.0 (from google-api-python-client)
  Downloading google_auth_httplib2-0.1.0-py2.py3-none-any.whl (9.3 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0.dev0,>=1.31.5 (from go

In [1]:
!nvidia-smi

Thu Jul 13 13:43:54 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.41.03              Driver Version: 530.41.03    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3070         Off| 00000000:01:00.0 Off |                  N/A |
|  0%   55C    P8               16W / 220W|     15MiB /  8192MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [7]:
!free

              total        used        free      shared  buff/cache   available
Mem:      181108932    10473132    73337164     5609268    97298636   163382608
Swap:             0           0           0


# Imports

In [1]:
import os
import torch
import time
import datetime
import sys
import traceback
import gc

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

# Loading the model

In [2]:
model_name = 'legendhasit/xgen-7b-8k-inst-8bit'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

KeyboardInterrupt: 

In [None]:
!pip install huggingface_hub["cli"]
!huggingface-cli delete-cache

# Inference

In [2]:
# Parameters:
# - DATASET_NAME
# - HEADER
# - PROMPT_TEMPLATE
# - samples in dataset 'input/<dataset_name>'
# - instructions in 'instructions.txt'

def mkdir(folder_path):
    try:
        os.mkdir(folder_path)
    except FileExistsError:
        pass

mkdir('input')

# Define prompt template
# ==========================================================================================
HEADER = (
    "A chat between a curious human and an artificial intelligence assistant. "
    "The assistant gives helpful, detailed, and polite answers to the human's questions."
)

PROMPT_TEMPLATE = HEADER + """

### Human: {instruction}

{article}

### Assistant:"""

# Load samples from dataset
# ==========================================================================================
DATASET_NAME = 'ami'
samples = os.listdir('input/' + DATASET_NAME + '/texts')
samples = [sample for sample in samples if sample.endswith('.txt')]
samples.sort()
n_samples = len(samples)
print('-- Found', n_samples, 'samples:', samples)

# Load instructions
# ==========================================================================================
instruction_file = open('instructions.txt', 'r', encoding='utf-8')
instructions = instruction_file.readlines()
instruction_file.close()
n_instructions = len(instructions)
for i in range(n_instructions):
    instructions[i] = instructions[i].replace('\n', '')
print('-- Found', n_instructions, 'instructions:', instructions)

-- Found 8 samples: ['sample_1.txt', 'sample_2.txt', 'sample_3.txt', 'sample_4.txt', 'sample_5.txt', 'sample_6.txt', 'sample_7.txt', 'sample_8.txt']
-- Found 6 instructions: ['Summarize the following text.', 'Summarize the following dialogue.', 'First give a list of keypoints from the following dialogue, then summarize it.', 'Provide an introduction and a list of keypoints for the following text. Then, summarize it.', 'I am a busy manager, and I do not have the time to read the following document. Please briefly summarize it for me.', 'I am a busy manager, and I do not have the time to read the following document. Provide an introduction and the list of keypoints for the following document. I also need a summary.']


In [7]:
# Inference
# ==========================================================================================
initial_time = time.time()
skipped_samples = 0

mkdir('intermediate')
mkdir('intermediate/' + DATASET_NAME)

print('Starting computation...')

# For each instruction
for instruction_n in range(n_instructions):

    # Read instruction and create prompt
    instruction = instructions[instruction_n]
    
    # For each sample in dataset
    for sample_n in range(n_samples):

        # Estimate completion and time.
        cur_samples = instruction_n * n_samples + sample_n - skipped_samples
        tot_samples = n_instructions * n_samples - skipped_samples
        progress = cur_samples / tot_samples
        pct = round(progress * 100, 1)
        print('Prompting instruction N' + str(instruction_n + 1) + '/' + str(n_instructions) + ' on sample N' + str(sample_n + 1) + '/' + str(n_samples))
        print('-- Completion: ' + str(pct) + '%')
        if cur_samples > 0:
            approx_total = (time.time() - initial_time) / cur_samples * tot_samples
            approx_remaining = approx_total * (1 - progress)
            print('-- Estimated Remaining Time: ' + str(datetime.timedelta(seconds=int(approx_remaining))) + ' (total ' + str(datetime.timedelta(seconds=int(approx_total))) + ')')
        
        # Read sample and generate prompt
        sample_file_path = 'input/' + DATASET_NAME + '/texts/' + samples[sample_n]
        sample_file = open(sample_file_path, 'r', encoding='utf-8')
        sample = sample_file.read()
        sample_file.close()
        prompt = PROMPT_TEMPLATE.format(instruction=instruction, article=sample)
        
        # Find target file
        target_file_path = 'intermediate/' + DATASET_NAME + '/' + str(instruction_n + 1) + '_' + str(sample_n + 1) + '.txt'
        if os.path.isfile(target_file_path):
            print('-- Found intermediate result file \'' + target_file_path + '\', skipped.')
            skipped_samples += 1
            continue
            
        try:
        
            # Sample one answer
            input_ids = tokenizer(prompt, return_tensors="pt").to('cuda')
            input_length = len(input_ids['input_ids'][0])
            print('-- Input Length:', input_length)
            sample = model.generate(**input_ids, do_sample=True, max_new_tokens=2048, top_k=100, eos_token_id=50256, temperature=0.3)
            output = tokenizer.decode(sample[0]).strip()

            # Save answer in file
            target_file = open(target_file_path, 'w', encoding='utf-8')
            target_file.write(output)
            target_file.close()

            del input_ids
            del sample
            del output
        
        except:
            print('Could not compute prompt:')
            print(prompt)
            traceback.print_exc()

        gc.collect()

delta = time.time() - initial_time
print('Done! Took', datetime.timedelta(seconds=int(delta)), 'seconds')

Starting computation...
Prompting instruction N1/6 on sample N1/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_1.txt', skipped.
Prompting instruction N1/6 on sample N2/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_2.txt', skipped.
Prompting instruction N1/6 on sample N3/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_3.txt', skipped.
Prompting instruction N1/6 on sample N4/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_4.txt', skipped.
Prompting instruction N1/6 on sample N5/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_5.txt', skipped.
Prompting instruction N1/6 on sample N6/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_6.txt', skipped.
Prompting instruction N1/6 on sample N7/10
-- Completion: 0.0%
-- Found intermediate result file 'intermediate/1_7.txt', skipped.
Prompting instruction N1/6 on sample N8/10
-- Completion: 0.0%
-- 

# Calculs de scores

In [2]:
!pip install rouge_score rouge
!pip install evaluate
!pip install bert-score
!pip install sacrebleu
!pip install nltk

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Collecting absl-py
  Using cached absl_py-1.4.0-py3-none-any.whl (126 kB)
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=333555242f4e464ad2f1466233462741b2f13d4d85af3a6bfe866ca7f1f269f6
  Stored in directory: /home/linagora/.cache/pip/wheels/3e/94/5c/7ff8a51c53c1bbc8df4cac58aa4990ffbc6fa203e9f0808fdd
Successfully built rouge_score
Installing collected packages: rouge, absl-py, rouge_score
Successfully installed absl-py-1.4.0 rouge-1.0.1 rouge_score-0.1.2
Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m11.3 MB/s[0

Installing collected packages: bert-score
Successfully installed bert-score-0.3.13
Collecting sacrebleu
  Downloading sacrebleu-2.3.1-py3-none-any.whl (118 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.9/118.9 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Collecting portalocker
  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: portalocker, sacrebleu
Successfully installed portalocker-2.7.0 sacrebleu-2.3.1


In [3]:
import evaluate
import os


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/linagora/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda113.so
CUDA SETUP: CUDA runtime path found: /home/linagora/anaconda3/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /home/linagora/.local/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda113.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [4]:
# Method and variables
# ==========================================================================================
print('Starting scores computation...')
bleu = evaluate.load("bleu")
rouge = evaluate.load('rouge')
bertscore = evaluate.load('bertscore')

STORAGE_FILE_NAME = 'scores'
PREPROCESS_SUMMARIES = True

# Script itself
# ==========================================================================================

# Find output file for CSV scores
mkdir('output')
mkdir('output/' + DATASET_NAME)
storage_file = open('output/' + DATASET_NAME + '/' + STORAGE_FILE_NAME + '.csv', 'w', encoding='utf-8')
storage_file.write('path;rouge2;rougel;bertscore\n')

target_file_paths = []
references = []
predictions = []

# For each instruction
for instruction_n in range(n_instructions):

    # Read instruction and create prompt
    instruction = instructions[instruction_n]
    
    # For each sample in dataset
    for sample_n in range(n_samples):
        
        # Find target file
        target_file_path = 'intermediate/' + DATASET_NAME + '/' + str(instruction_n + 1) + '_' + str(sample_n + 1) + '.txt'
        if not os.path.isfile(target_file_path): # A MODIFIER : SI UN RESUME N'A PAS ETE GENERE
            print('-- Found no intermediate result file \'' + target_file_path + '\', skipped.')
            continue
        
        # Read sample and generate prompt -> Keep summary
        summary_file_path = 'input/' + DATASET_NAME + '/summaries/sample_' + str(sample_n + 1) + '.txt'
        summary_file = open(summary_file_path, 'r', encoding='utf-8')
        references.append(summary_file.read())
        summary_file.close()

        # Access generated summary
        target_file = open(target_file_path, 'r', encoding='utf-8')
        prediction = target_file.read()
        target_file.close()

        # Process answer
        if PREPROCESS_SUMMARIES:
            separator = "### Assistant:"
            prediction = prediction[prediction.index(separator) + len(separator):]
            if prediction[0] == " ": # Enlever l'espace devant
                prediction = prediction[1:]
            prediction = prediction[:-len("<|endoftext|>") - 2]
            #print(prediction)
            #print('---------------------------------------')
        
        # Add prediction
        target_file_paths.append(target_file_path)
        predictions.append(prediction)

# Calculate metrics
result_rouge = rouge.compute(predictions=predictions, references=references, use_aggregator=False)
#result_bleu = bleu.compute(predictions=predictions, references=references)
result_bertscore = bertscore.compute(predictions=predictions, references=references, lang='fr', rescale_with_baseline=True, verbose=True)

# Write to csv
# Forget about BLEU...
# Format: PATH | ROUGE2 | ROUGEL | (BLEU |) BERTScore
for i in range(len(target_file_paths)):
    ligne = target_file_paths[i]
    ligne += ';' + str(result_rouge['rouge2'][i]) + ";" + str(result_rouge['rougeL'][i])
    #ligne += ";" + str(result_bleu['bleu'])
    ligne += ";" + str(result_bertscore['f1'][i])

    storage_file.write(ligne + '\n')

storage_file.close()
print('Done!')

Starting scores computation...
calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 14839.71 seconds, 0.00 sentences/sec
Done!
