## import packages

In [2]:
# import packages
import os
from tqdm.notebook import tqdm
import warnings
import json
import torch.nn.functional as F
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM
from datetime import datetime

## load data

In [3]:
# load from json file

data_path = "./bbc_news_240125_to_240203.json"


def load_list_from_json(file_path):
    """
    Loads a list of strings from a JSON file.

    :param file_path: Path of the JSON file to be loaded.
    :return: List of strings loaded from the JSON file.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)
    

extracted_texts = load_list_from_json(data_path)

print(len(extracted_texts))
# print([len(x) for x in extracted_texts])

1000


In [4]:
for text in extracted_texts[:100]:
    print(text)
    print('-' * 100)

Canada has announced plans to again delay an expansion to its medical assistance in dying (maid) programme for people who have a mental illness.
It comes after a parliamentary committee recommended a pause on the expansion, which was originally set to come into effect on 17 March.
Ottawa had also been warned by the provinces that they had not had time to properly prepare.
Legislation was tabled on Thursday to delay the expansion to 2027.
The government "has heard - and agrees - that the health system is not yet ready for this expansion", Health Canada said in a statement.
Concerns have been raised by provinces across the country, which oversee healthcare services, about whether the system is ready and robust enough to handle the expansion.
Speaking on Thursday in Ottawa, federal Health Minister Mark Holland said the government accepts the equivalency of mental suffering and physical suffering, but that it is a "question of readiness".
He said his provincial counterparts, as well as Can

## Now evaluating the models

In [5]:
def calculate_log_sum(logits, target_token_ids):
    shifted_logits = logits[:-1, :]
    shifted_targets = target_token_ids[1:]
    
    log_probs = F.log_softmax(shifted_logits, dim=-1)
    
    target_log_probs = -log_probs.gather(1, shifted_targets.unsqueeze(1)).squeeze()
    # print(target_log_probs)
    
    log_sum = torch.sum(target_log_probs, dim=-1)
    # print(perplexity_sum)

    return log_sum.item()


def print_model_parameters_in_billions(model):
    
    total_params = sum(p.numel() for p in model.parameters())
    
    total_params_billion = total_params / 1e9
    
    print(f"Model parameters: {total_params_billion:.3f} billion")
    
    
def log(data_dict, folder_path):
    if not os.path.exists(folder_path):
        try:
            os.makedirs(folder_path)
            print(f"Directory created at {folder_path}")
        except Exception as e:
            print(f"Error creating directory: {e}")
            return

    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    file_name = f"{timestamp}.json"
    file_path = os.path.join(folder_path, file_name)

    try:
        with open(file_path, 'w') as file:
            json.dump(data_dict, file, indent=4)
        print(f"Dictionary saved successfully to {file_path}")
    except Exception as e:
        print(f"Error saving dictionary: {e}")

## Evaluate RWKV

In [6]:
# load rwkv model
model_name_or_path = r'../models/rwkv_6_1b6_78/rwkv-x060-1b6-world-v2-78%trained-20240131-ctx4k.pth'
max_length = 4096
log_folder_path = './logs/'

os.environ['RWKV_JIT_ON'] = '1'
os.environ["RWKV_CUDA_ON"] = '1'

from rwkv.model import RWKV
from rwkv.utils import PIPELINE

model = RWKV(model=model_name_or_path, strategy='cuda fp16')
pipeline = PIPELINE(model, r"rwkv_vocab_v20230424")
# pipeline = PIPELINE(model, "./models/20B_tokenizer.json")  # v4
tokenizer = pipeline.tokenizer

Using /root/.cache/torch_extensions/py38_cu118 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py38_cu118/wkv_cuda/build.ninja...
Building extension module wkv_cuda...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)


ninja: no work to do.
RWKV_JIT_ON 1 RWKV_CUDA_ON 1 RESCALE_LAYER 6

Loading ../models/rwkv_6_1b6_78/rwkv-x060-1b6-world-v2-78%trained-20240131-ctx4k.pth ...


Loading extension module wkv_cuda...


Model detected: v6.0
Strategy: (total 24+1=25 layers)
* cuda [float16, float16], store 25 layers
0-cuda-float16-float16 1-cuda-float16-float16 2-cuda-float16-float16 3-cuda-float16-float16 4-cuda-float16-float16 5-cuda-float16-float16 6-cuda-float16-float16 7-cuda-float16-float16 8-cuda-float16-float16 9-cuda-float16-float16 10-cuda-float16-float16 11-cuda-float16-float16 12-cuda-float16-float16 13-cuda-float16-float16 14-cuda-float16-float16 15-cuda-float16-float16 16-cuda-float16-float16 17-cuda-float16-float16 18-cuda-float16-float16 19-cuda-float16-float16 20-cuda-float16-float16 21-cuda-float16-float16 22-cuda-float16-float16 23-cuda-float16-float16 24-cuda-float16-float16 
emb.weight                        f16      cpu  65536  2048 
blocks.0.ln1.weight               f16   cuda:0   2048       
blocks.0.ln1.bias                 f16   cuda:0   2048       
blocks.0.ln2.weight               f16   cuda:0   2048       
blocks.0.ln2.bias                 f16   cuda:0   2048       
blocks.

Using /root/.cache/torch_extensions/py38_cu118 as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py38_cu118/rwkv6/build.ninja...
Building extension module rwkv6...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)


ninja: no work to do.


Loading extension module rwkv6...


In [7]:
# eval rwkv
rwkv_test_data = []
rwkv_token_length_list = []
overlong_seq = 0

for idx, sample in tqdm(enumerate(extracted_texts), total=len(extracted_texts)):
    
    with torch.no_grad():
        
        input_seq = tokenizer.encode(sample)
        # input_seq = tokenizer.encode(sample).ids # v4
        input_length = len(input_seq)

        if input_length > max_length:
            overlong_seq += 1
            warnings.warn(f'seq-{idx} length > {max_length}')
            
            factor = input_length / max_length

            input_seq = input_seq[:max_length]
        else:
            factor = 1
            

        logit = model.forward(input_seq, None, full_output=True)[0]

        log_sum = calculate_log_sum(logit, torch.tensor(input_seq).cuda())
        log_sum *= factor

        rwkv_token_length_list.append(input_length)
        rwkv_test_data.append(log_sum)
        
data_dict = {
    'model_name_or_path': model_name_or_path,
    'data_path': data_path,
    'neg_log_prob_sum': sum(rwkv_test_data) / len(rwkv_test_data),
    'avg tokens': sum(rwkv_token_length_list) / len(rwkv_token_length_list),
    'overlong_seq': overlong_seq
       }

log(data_dict, log_folder_path)
        
print(f'log probability sum: {sum(rwkv_test_data) / len(rwkv_test_data)}')
print(f'avg tokens: {sum(rwkv_token_length_list) / len(rwkv_token_length_list)}')
print(f'overlong_seq: {overlong_seq}')

  0%|          | 0/1000 [00:00<?, ?it/s]

 does not have profile information (Triggered internally at ../third_party/nvfuser/csrc/graph_fuser.cpp:104.)
  r, k, v, g, w, xxx, ss = self.v6_0_before(x, sx, s, ln_w, ln_b, lx_w, lx_b, x_maa, w_maa, k_maa, v_maa, r_maa, g_maa, tm_w1, tm_w2, td_w1, td_w2, t_decay, t_first, kw, vw, rw, gw, ow, kmx, krx, kmy, kry, vmx, vrx, vmy, vry, rmx, rrx, rmy, rry, gmx, grx, gmy, gry, omx, orx, omy, ory)


Dictionary saved successfully to ./logs/2024-02-05_22-58-54.json
log probability sum: 1300.3670690612794
avg tokens: 551.783
overlong_seq: 0


In [8]:
del model, pipeline, tokenizer, logit

gc.collect()
torch.cuda.empty_cache()

## Evaluate Hugging Face models

In [9]:
# load model

model_name_or_path = r"h2oai/h2o-danube-1.8b-base"
max_length = 4096

log_folder_path = './logs/'
cache_dir = '../models/temp/'

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, 
                                             device_map="cuda", 
                                             trust_remote_code=True, 
                                             cache_dir=cache_dir).eval()

print(f'max_length: {max_length}')

max_length: 4096


In [10]:
print_model_parameters_in_billions(model)

Model parameters: 1.831 billion


In [11]:
# eval
data = []
token_length_list = []
overlong_seq = 0

for idx, sample in tqdm(enumerate(extracted_texts), total=len(extracted_texts)):
    
    with torch.no_grad():
    
        inputs = tokenizer(sample, return_tensors='pt')
        inputs = inputs.to(model.device)

        seq_length = inputs['input_ids'].shape[-1]

        if seq_length > max_length:
            overlong_seq += 1
            
            inputs = tokenizer(sample, return_tensors='pt', max_length=max_length, truncation=True)
            inputs = inputs.to(model.device)
            warnings.warn(f'seq-{idx} length {seq_length} > truncation_length({max_length}) truncated')
            factor = (seq_length / max_length)
        else:
            factor = 1
                
        # inputs['attention_mask'] = inputs['attention_mask'] == 1  # mosaicml/mpt-1b-redpajama-200b
        # inputs = {'input_ids': inputs['input_ids']}  # allenai/OLMo-1B
        logit = model.forward(**inputs).logits[0, :, :]

        log_sum = calculate_log_sum(logit, inputs['input_ids'].squeeze(0))
        log_sum *= factor
        # print(log_sum)

        token_length_list.append(seq_length)
        data.append(log_sum)
        
data_dict = {
    'model_name_or_path': model_name_or_path,
    'data_path': data_path,
    'neg_log_prob_sum': sum(data) / len(data),
    'avg tokens': sum(token_length_list) / len(token_length_list),
    'overlong_seq': overlong_seq
       }

log(data_dict, log_folder_path)

print(f'log probability sum: {sum(data) / len(data)}')
print(f'avg tokens: {sum(token_length_list) / len(token_length_list)}')
print(f'overlong_seq: {overlong_seq}')

  0%|          | 0/1000 [00:00<?, ?it/s]

Dictionary saved successfully to ./logs/2024-02-05_23-00-52.json
log probability sum: 1241.0432622680664
avg tokens: 630.282
overlong_seq: 0


In [12]:
del model, tokenizer, logit, inputs

gc.collect()
torch.cuda.empty_cache()

## Evaluate Mamba

In [13]:
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel

model_name_or_path = "state-spaces/mamba-1.4b"
max_length = 2048
log_folder_path = './logs/'

mamba_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
mamba_model = MambaLMHeadModel.from_pretrained(model_name_or_path, device="cuda", dtype=torch.float16)
device = torch.device('cuda')

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
print_model_parameters_in_billions(mamba_model)

Model parameters: 1.372 billion


In [15]:
data = []
token_length_list = []
overlong_seq = 0

for idx, sample in tqdm(enumerate(extracted_texts), total=len(extracted_texts)):
    
    with torch.no_grad():
        
        inputs = mamba_tokenizer(sample, return_tensors="pt").input_ids.to(device=device)

        seq_length = inputs.shape[-1]

        if seq_length > max_length:
            overlong_seq += 1
            warnings.warn(f'seq-{idx} length {seq_length} > truncation_length({max_length})')
            factor = (seq_length / max_length)
            inputs = mamba_tokenizer(sample, return_tensors='pt', max_length=max_length, truncation=True).input_ids.to(device=device)
        else:
            factor = 1

        mamba_output = mamba_model.forward(inputs)
        logit = mamba_output.logits[0, :, :]

        log_sum = calculate_log_sum(logit, inputs[0])
        log_sum *= factor

        token_length_list.append(seq_length)
        data.append(log_sum)
        
data_dict = {
    'model_name_or_path': model_name_or_path,
    'data_path': data_path,
    'neg_log_prob_sum': sum(data) / len(data),
    'avg tokens': sum(token_length_list) / len(token_length_list),
    'overlong_seq': overlong_seq
       }

log(data_dict, log_folder_path)
        

print(f'log probability sum: {sum(data) / len(data)}')
print(f'avg tokens: {sum(token_length_list) / len(token_length_list)}')
print(f'overlong_seq: {overlong_seq}')

  0%|          | 0/1000 [00:00<?, ?it/s]

Dictionary saved successfully to ./logs/2024-02-05_23-02-06.json
log probability sum: 1325.0888125
avg tokens: 551.678
overlong_seq: 0


In [16]:
del mamba_model, mamba_tokenizer, inputs, logit, mamba_output

gc.collect()
torch.cuda.empty_cache()