In [1]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, MambaForCausalLM
from datasets import load_dataset
from evaluate import load

In [2]:
torch.cuda.set_device(2)

In [3]:
torch.cuda.current_device()

2

In [4]:
dataset = load_dataset("Trelis/tiny-shakespeare")

In [5]:
dataset.shape

{'train': (472, 1), 'test': (49, 1)}

In [6]:
dataset["train"]

Dataset({
    features: ['Text'],
    num_rows: 472
})

In [7]:
dataset["test"]["Text"][0]

"TRANIO:\nIs this your speeding? nay, then, good night our part!\n\nPETRUCHIO:\nBe patient, gentlemen; I choose her for myself:\nIf she and I be pleased, what's that to you?\n'Tis bargain'd 'twixt us twain, being alone,\nThat she shall still be curst in company.\nI tell you, 'tis incredible to believe\nHow much she loves me: O, the kindest Kate!\nShe hung about my neck; and kiss on kiss\nShe vied so fast, protesting oath on oath,\nThat in a twink she won me to her love.\nO, you are novices! 'tis a world to see,\nHow tame, when men and women are alone,\nA meacock wretch can make the curstest shrew.\nGive me thy hand, Kate: I will unto Venice,\nTo buy apparel 'gainst the wedding-day.\nProvide the feast, father, and bid the guests;\nI will be sure my Katharina shall be fine.\n\nBAPTISTA:\nI know not what to say: but give me your hands;\nGod send you joy, Petruchio! 'tis a match.\n\nGREMIO:\nAmen, say we: we will be witnesses.\n\nPETRUCHIO:\nFather, and wife, and gentlemen, adieu;\nI will 

In [8]:
len(dataset["test"]["Text"][0])

2859

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"

In [10]:
# model_size = "130m"
# model_size = "370m"
# model_size = "790m"
# model_size = "1.4b"
model_size = "2.8b"

In [11]:
model = MambaForCausalLM.from_pretrained(f"state-spaces/mamba-{model_size}-hf").to(device)

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/50.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/1.15G [00:00<?, ?B/s]

The fast path is not available because on of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)` is None. Falling back to the naive implementation. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [12]:
tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-370m-hf")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [13]:
encodings = tokenizer(dataset["test"]["Text"], return_tensors= "pt", padding=True)

In [14]:
model.config.get_config_dict("state-spaces/mamba-370m-hf")



({'architectures': ['MambaForCausalLM'],
  'bos_token_id': 0,
  'conv_kernel': 4,
  'd_inner': 160,
  'd_model': 1024,
  'dt_rank': 'auto',
  'eos_token_id': 0,
  'expand': 2,
  'fused_add_norm': True,
  'hidden_act': 'silu',
  'hidden_size': 1024,
  'initializer_range': 0.1,
  'intermediate_size': 2048,
  'layer_norm_epsilon': 1e-05,
  'model_type': 'mamba',
  'n_layer': 48,
  'num_hidden_layers': 48,
  'pad_token_id': 0,
  'pad_vocab_size_multiple': 8,
  'rescale_prenorm_residual': False,
  'residual_in_fp32': True,
  'rms_norm': True,
  'ssm_cfg': {},
  'state_size': 16,
  'time_step_floor': 0.0001,
  'time_step_init_scheme': 'random',
  'time_step_max': 0.1,
  'time_step_min': 0.001,
  'time_step_rank': 64,
  'time_step_scale': 1.0,
  'torch_dtype': 'float32',
  'transformers_version': '4.39.0.dev0',
  'use_bias': False,
  'use_cache': True,
  'use_conv_bias': True,
  'vocab_size': 50280,
  '_commit_hash': 'b519127f5bfaaa1c27dd938dad051ec360972b23'},
 {})

In [15]:
perplexity = load("perplexity", module_type="metric")

In [16]:
predictions = []
with torch.no_grad():
    for text in dataset["test"]["Text"]:
        encodings = tokenizer(text, return_tensors= "pt", padding=True)
        input_ids = encodings.input_ids.to(device)
        outputs = model(input_ids)
        p = tokenizer.decode(outputs.logits.argmax(dim=-1)[0], skip_special_tokens=True)
        predictions.append(p)

In [17]:
dataset["test"]["Text"][0]

"TRANIO:\nIs this your speeding? nay, then, good night our part!\n\nPETRUCHIO:\nBe patient, gentlemen; I choose her for myself:\nIf she and I be pleased, what's that to you?\n'Tis bargain'd 'twixt us twain, being alone,\nThat she shall still be curst in company.\nI tell you, 'tis incredible to believe\nHow much she loves me: O, the kindest Kate!\nShe hung about my neck; and kiss on kiss\nShe vied so fast, protesting oath on oath,\nThat in a twink she won me to her love.\nO, you are novices! 'tis a world to see,\nHow tame, when men and women are alone,\nA meacock wretch can make the curstest shrew.\nGive me thy hand, Kate: I will unto Venice,\nTo buy apparel 'gainst the wedding-day.\nProvide the feast, father, and bid the guests;\nI will be sure my Katharina shall be fine.\n\nBAPTISTA:\nI know not what to say: but give me your hands;\nGod send you joy, Petruchio! 'tis a match.\n\nGREMIO:\nAmen, say we: we will be witnesses.\n\nPETRUCHIO:\nFather, and wife, and gentlemen, adieu;\nI will 

In [18]:
predictions[0]

"SP: I\n it the son,\nay, I, I my.\ning\n\nLRUCHIO:\nI not, sir, I will you for my.\nShe you will I do well, all needs that to you?\n\nTis not'd,twixt us,ain, and so,\nAnd she shall be be yoursst, company.\n\n'll you, sirtis a to think,How many this is me, she,'heavens creature creature!\n\n loves about my neck, she kiss'd kiss\nShe castied upon fast, I oath on oath,\nThat I a twink she won me to her mind.\n\n, she are wellices in youT not world to see\n\nHow soon and how once are women come alone,\nThe womanwedock wretch, make a verstest shrew!\n\n me a hand, Pet: I will make my,\nAnd visit apparel forgainst our time-day.\n\nide thy feast, and, and I the guests:\nI'll be there to Katearineina be be\n.\n\nKAPTISTA:\nI'll not what to say; I fare your leave hand,\nAnd give you joy, andruchio!\ntis a match.\n\nPETMIO:\nImen, amen I; and'll be witnesses.\n\nPETRUCHIO:\nI, I you, and friends, adieu!\nI must to Venice, and is apace.\nI'll be rings, things to fine array,\nAnd then the, Kate, 

In [19]:
results = perplexity.compute(predictions=predictions, model_id='gpt2', add_start_token=False)

  0%|          | 0/4 [00:00<?, ?it/s]

In [20]:
results

{'perplexities': [238.94775390625,
  255.7991485595703,
  277.0682373046875,
  188.1632537841797,
  119.13833618164062,
  209.71636962890625,
  191.4449005126953,
  125.08646392822266,
  211.05160522460938,
  129.9571990966797,
  194.24221801757812,
  304.9227294921875,
  214.4589385986328,
  217.24822998046875,
  283.9491271972656,
  185.73526000976562,
  184.69346618652344,
  231.74755859375,
  151.3889923095703,
  181.18544006347656,
  281.0938415527344,
  158.6200408935547,
  85.45830535888672,
  210.49923706054688,
  223.92263793945312,
  195.72096252441406,
  125.21846771240234,
  132.5686492919922,
  140.87266540527344,
  103.31908416748047,
  163.13876342773438,
  75.99039459228516,
  179.43064880371094,
  206.74925231933594,
  278.9527587890625,
  163.23159790039062,
  200.23837280273438,
  316.28955078125,
  281.96466064453125,
  191.7311248779297,
  356.5668029785156,
  180.77035522460938,
  305.42681884765625,
  256.2240905761719,
  260.3822937011719,
  84.84712982177734,
 

Resultados perplexity:

Â· Trelis/tiny-shakespeare (test):

    130m -> 210.281 points
    370m -> 211.817 points
    790m -> 215.818 points
    1.4b -> 214.880 points
    2.8b -> 196.647 points