In [2]:
# Run this cell for inputs
"""
We are going to test all of the different models in this notebook with different groups of cells representing each model
"""
input = "Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the formation of the insoluble tetravalent species Pu(OH)4 (pKsp56).  Question: What elements make up the molecules that are mentioned in the input text? Output:"

In [3]:
!nvidia-smi

Thu Jun 29 09:09:31 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.43.04    Driver Version: 515.43.04    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:21:00.0 Off |                  N/A |
| 35%   24C    P8    14W / 260W |     15MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# OpenLLaMA runs into an error trying to import LlamaTokenizer and LlamaForCausalLM, so we're moving on to the next
# After reading I have discovered that LlamaTokenizer and LlamaForCausalLM are only obtainable 
# by getting LLaMA's weights and converting them to HuggingFace format.
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

model_path = 'openlm-research/open_llama_3b'
# model_path = 'openlm-research/open_llama_7b'

tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map='auto',
)

input_ids = tokenizer(input, return_tensors="pt").input_ids

generation_output = model.generate(
    input_ids=input_ids, max_length=400
)
print(tokenizer.decode(generation_output[0]))

ImportError: cannot import name 'LlamaTokenizer' from 'transformers' (/home/lendes/anaconda3/envs/suli/lib/python3.8/site-packages/transformers/__init__.py)

In [None]:
# OPT-2.7B
import torch
from transformers import pipeline
model = pipeline('text-generation', model="facebook/opt-2.7b")
model(input, max_length=400)

In [2]:
# OPT-1.3b
import torch
from transformers import pipeline
model = pipeline('text-generation', model="facebook/opt-1.3b")
model(input, max_length=400)

Downloading (…)okenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

[{'generated_text': 'Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the forma

In [3]:
# Falcon-RW-1B
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
model = "tiiuae/falcon-rw-1b"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)
sequences = pipeline(
    input,
    max_length=400,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GPT2LMHeadModel', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'M

Result: Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the formation of the i

In [4]:
# MPT-1B-Redpajama-200B-Dolly
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


tokenizer = AutoTokenizer.from_pretrained('mosaicml/mpt-1b-redpajama-200b-dolly', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained('mosaicml/mpt-1b-redpajama-200b-dolly', trust_remote_code=True)
model.to(device='cuda:0')

inputs = tokenizer(input, return_tensors="pt").to(device='cuda:0')
outputs = model.generate(**inputs, max_length=400)
text_output = tokenizer.batch_decode(outputs)
print(text_output)
#print(text_output[0].split("<|endoftext|>")[0].split(input)[1].strip())

Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.
['Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The 

In [5]:
# Galactica
import torch
import galai

model = galai.load_model("base")
model.generate(input, max_length=400)

'Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the formation of the insolubl

In [6]:
# BLOOM
import transformers
from transformers import BloomForCausalLM
from transformers import BloomTokenizerFast
import torch

model = BloomForCausalLM.from_pretrained("bigscience/bloom-1b7")
tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-1b7")

inputs = tokenizer(input, return_tensors="pt")
print(tokenizer.decode(model.generate(inputs["input_ids"], 
                       max_length=400
                      )[0]))

Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the formation of the insoluble

In [2]:
# GPT-Neo-1.3b
from transformers import pipeline
generator = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')
generator(input, do_sample=True, max_length=400)

Downloading (…)okenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the forma

In [None]:
# GPT-Neo-2.7b // this model doesn't fit on the gpu (it's like 10 gb)
from transformers import pipeline
generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
generator(input, do_sample=True, max_length=400)

In [2]:
# Pythia-1.4b-deduped
from transformers import GPTNeoXForCausalLM, AutoTokenizer

model = GPTNeoXForCausalLM.from_pretrained(
  "EleutherAI/pythia-1.4b-deduped",
  revision="step3000",
  cache_dir="./pythia-1.4b-deduped/step3000",
)

tokenizer = AutoTokenizer.from_pretrained(
  "EleutherAI/pythia-1.4b-deduped",
  revision="step3000",
  cache_dir="./pythia-1.4b-deduped/step3000",
)

inputs = tokenizer(input, return_tensors="pt")
tokens = model.generate(**inputs, max_length=400)
tokenizer.decode(tokens[0])


Downloading (…)step3000/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

Downloading (…)p3000/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Input length of input_ids is 308, but `max_length` is set to 20. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


'Input: A large majority of total radioactivity has been deposited in the oceans. This includes radionuclides such as 235U, 238U, 239Pu, 137Cs, 99Tc, 90Sr, etc. Question: What radioactive elements have been deposited in the oceans? Output: Uranium-235, Uranium-238, Plutonium-239, Caesium-137, Technetium-99, Strontium-90, and more. Input: Am(OH)3 sorbs readily to surfaces. Am(III) sorption onto catcite and aragonite surfaces is both rapid and extensive, and less than 1% is readily desorbed.29 Americium(III) is more extensively sorbed onto aragonite than onto calcite, although both minerals consist of CaCO3. Question: What molecules are mentioned in the text above? Output: Am(OH)3, CaCO3 Input: The pentavalent cation AnO2 + comprises ca. 95% of the total concentration of soluble neptunium and plutonium in solutions with carbonate concentrations 10–4M. The solubility of NpO2 + may be as high as 10–4M, while the solubility of PuO2 + (10–8–10–6M), is limited by the formation of the insolubl

In [None]:
# Pythia-2.8b-deduped
from transformers import GPTNeoXForCausalLM, AutoTokenizer

model = GPTNeoXForCausalLM.from_pretrained(
  "EleutherAI/pythia-2.8b-deduped",
  revision="step3000",
  cache_dir="./pythia-2.8b-deduped/step3000",
)

tokenizer = AutoTokenizer.from_pretrained(
  "EleutherAI/pythia-2.8b-deduped",
  revision="step3000",
  cache_dir="./pythia-2.8b-deduped/step3000",
)

inputs = tokenizer(input, return_tensors="pt")
tokens = model.generate(**inputs, max_length=400)
tokenizer.decode(tokens[0])

In [None]:
# StableLM-tuned-alpha-3b // this is also too big to run (10.2gb)
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList

tokenizer = AutoTokenizer.from_pretrained("StabilityAI/stablelm-tuned-alpha-3b")
model = AutoModelForCausalLM.from_pretrained("StabilityAI/stablelm-tuned-alpha-3b")
model.half().cuda()

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [50278, 50279, 50277, 1, 0]
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
- StableLM will refuse to participate in anything that could harm a human.
"""

prompt = f"{system_prompt}<|USER|>{input}<|ASSISTANT|>"

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
tokens = model.generate(
  **inputs,
  max_length=400,
  temperature=0.7,
  do_sample=True,
  stopping_criteria=StoppingCriteriaList([StopOnTokens()])
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))

Downloading (…)okenizer_config.json:   0%|          | 0.00/264 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/606 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/21.1k [00:00<?, ?B/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/10.2G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.66G [00:00<?, ?B/s]