In [1]:
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, MambaForCausalLM
from datasets import load_dataset
from evaluate import load

In [2]:
torch.cuda.set_device(2)

In [3]:
torch.cuda.current_device()

2

In [4]:
dataset = load_dataset("Trelis/tiny-shakespeare")

In [5]:
dataset.shape

{'train': (472, 1), 'test': (49, 1)}

In [6]:
dataset["train"]

Dataset({
    features: ['Text'],
    num_rows: 472
})

In [7]:
dataset["test"]["Text"][0]

"TRANIO:\nIs this your speeding? nay, then, good night our part!\n\nPETRUCHIO:\nBe patient, gentlemen; I choose her for myself:\nIf she and I be pleased, what's that to you?\n'Tis bargain'd 'twixt us twain, being alone,\nThat she shall still be curst in company.\nI tell you, 'tis incredible to believe\nHow much she loves me: O, the kindest Kate!\nShe hung about my neck; and kiss on kiss\nShe vied so fast, protesting oath on oath,\nThat in a twink she won me to her love.\nO, you are novices! 'tis a world to see,\nHow tame, when men and women are alone,\nA meacock wretch can make the curstest shrew.\nGive me thy hand, Kate: I will unto Venice,\nTo buy apparel 'gainst the wedding-day.\nProvide the feast, father, and bid the guests;\nI will be sure my Katharina shall be fine.\n\nBAPTISTA:\nI know not what to say: but give me your hands;\nGod send you joy, Petruchio! 'tis a match.\n\nGREMIO:\nAmen, say we: we will be witnesses.\n\nPETRUCHIO:\nFather, and wife, and gentlemen, adieu;\nI will 

In [8]:
len(dataset["test"]["Text"][0])

2859

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"

In [10]:
model_size = "130m"
# model_size = "370m"
# model_size = "790m"
# model_size = "1.4b"
# model_size = "2.8b"

In [11]:
model = MambaForCausalLM.from_pretrained(f"state-spaces/mamba-{model_size}-hf").to(device)

The fast path is not available because on of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)` is None. Falling back to the naive implementation. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d


In [12]:
tokenizer = AutoTokenizer.from_pretrained(f"state-spaces/mamba-{model_size}-hf")

tokenizer_config.json:   0%|          | 0.00/4.79k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [13]:
model.config.get_config_dict(f"state-spaces/mamba-{model_size}-hf")



({'architectures': ['MambaForCausalLM'],
  'bos_token_id': 0,
  'conv_kernel': 4,
  'd_inner': 1536,
  'd_model': 768,
  'eos_token_id': 0,
  'expand': 2,
  'fused_add_norm': True,
  'hidden_act': 'silu',
  'hidden_size': 768,
  'initializer_range': 0.1,
  'intermediate_size': 1536,
  'layer_norm_epsilon': 1e-05,
  'model_type': 'mamba',
  'n_layer': 24,
  'num_hidden_layers': 24,
  'pad_token_id': 0,
  'pad_vocab_size_multiple': 8,
  'rescale_prenorm_residual': False,
  'residual_in_fp32': True,
  'rms_norm': True,
  'ssm_cfg': {},
  'state_size': 16,
  'time_step_floor': 0.0001,
  'time_step_init_scheme': 'random',
  'time_step_max': 0.1,
  'time_step_min': 0.001,
  'time_step_rank': 48,
  'time_step_scale': 1.0,
  'torch_dtype': 'float32',
  'transformers_version': '4.39.0.dev0',
  'use_bias': False,
  'use_cache': True,
  'use_conv_bias': True,
  'vocab_size': 50280,
  '_commit_hash': '1e76775f628fbf1350fbe4dbb3d971ba64af25a1'},
 {})

In [14]:
perplexity = load("perplexity", module_type="metric")

In [15]:
predictions = []
with torch.no_grad():
    for text in dataset["test"]["Text"]:
        encodings = tokenizer(text, return_tensors= "pt", padding=True)
        input_ids = encodings.input_ids.to(device)
        outputs = model(input_ids)
        p = tokenizer.decode(outputs.logits.argmax(dim=-1)[0], skip_special_tokens=True)
        predictions.append(p)

In [16]:
dataset["test"]["Text"][0]

"TRANIO:\nIs this your speeding? nay, then, good night our part!\n\nPETRUCHIO:\nBe patient, gentlemen; I choose her for myself:\nIf she and I be pleased, what's that to you?\n'Tis bargain'd 'twixt us twain, being alone,\nThat she shall still be curst in company.\nI tell you, 'tis incredible to believe\nHow much she loves me: O, the kindest Kate!\nShe hung about my neck; and kiss on kiss\nShe vied so fast, protesting oath on oath,\nThat in a twink she won me to her love.\nO, you are novices! 'tis a world to see,\nHow tame, when men and women are alone,\nA meacock wretch can make the curstest shrew.\nGive me thy hand, Kate: I will unto Venice,\nTo buy apparel 'gainst the wedding-day.\nProvide the feast, father, and bid the guests;\nI will be sure my Katharina shall be fine.\n\nBAPTISTA:\nI know not what to say: but give me your hands;\nGod send you joy, Petruchio! 'tis a match.\n\nGREMIO:\nAmen, say we: we will be witnesses.\n\nPETRUCHIO:\nFather, and wife, and gentlemen, adieu;\nI will 

In [17]:
predictions[0]

"SP, I\n it a first ticket\nah, your,\n,,\ning\n\n_ERCHIO:\nIhold, my, I am to. you.\nI she be I are friends, I will the to you?\n\nTis a,,twillxt us,ain, and so.\nAnd we shall be be mysing, her.\n\n'll you, mytis not, me\nThat much she is you, and, I moreest,!\n\n's upon my neck, and,'d kiss,I kissedied with with with that,, oath,\nThat I her momentinklingly would't to her side.\n\n, my are aices, youT a thing of be\n\nAnd much and how you are women are so,\nAndrenekke ofring,'t, worldst fool kiss.\n\n me your hand, and, I'll not thee.\nAnd Venice herarel fortwst the world ofday.\n\nide me best, and, and let me maid\n\nAnd'll not thy to lovearineina will be\n.\n\nPETENTISTE:\nI will not what to say,\n I me your hand,\nAnd be me a, andruchio!\ntis a joy\n\n\nPETAT::\nI thousand, my,, and are be merry.\n\nPETRUCHIO:\nI, I mother, and children,\nieu!\nI will be Venice, and,.ace.\nI will be a, flowers to a things,\nAnd, the, and, and will kiss friends.'er.\n\nPETMIO:\nI it so soapped'd in

In [18]:
results = perplexity.compute(predictions=predictions, model_id='gpt2', add_start_token=False)

  0%|          | 0/4 [00:00<?, ?it/s]

In [19]:
results

{'perplexities': [221.55735778808594,
  198.0741424560547,
  217.16162109375,
  228.64495849609375,
  120.38847351074219,
  261.9655456542969,
  227.4635467529297,
  161.25106811523438,
  217.4535369873047,
  121.30035400390625,
  206.34690856933594,
  242.25991821289062,
  210.80319213867188,
  194.0653839111328,
  227.86109924316406,
  151.5801239013672,
  176.01309204101562,
  236.51327514648438,
  151.68605041503906,
  174.5030059814453,
  273.3298034667969,
  130.21368408203125,
  98.06568908691406,
  256.57977294921875,
  223.55010986328125,
  185.99839782714844,
  134.82040405273438,
  148.70501708984375,
  148.43099975585938,
  116.62020874023438,
  215.33444213867188,
  86.23014831542969,
  226.6719512939453,
  214.08407592773438,
  322.2111511230469,
  241.0570068359375,
  184.97647094726562,
  251.6980438232422,
  356.30267333984375,
  260.2293701171875,
  424.2401428222656,
  325.1575622558594,
  312.4330139160156,
  295.7098693847656,
  373.6962585449219,
  84.699546813964

Resultados perplexity:

· Trelis/tiny-shakespeare (test):

    130m -> 210.281 points
    370m -> 211.817 points
    790m -> 215.818 points
    1.4b -> 214.880 points
    2.8b -> 196.647 points