## Install requirements

In [None]:
!pip -q install transformers accelerate bitsandbytes
!pip -q install huggingface_hub 

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m98.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.3/215.3 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.3/104.3 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m78.6 MB/s[0m eta [36m0:00:00[0m
[?25h

Let's see what the colab team has blessed us with.

In [None]:
!nvidia-smi -L

GPU 0: NVIDIA A100-SXM4-40GB (UUID: GPU-86d32c34-faeb-4dbf-28ec-76c68a0227ed)


## Clean up GPU memory if necessary

In [None]:
import gc
import torch
torch.cuda.empty_cache()
gc.collect()

## Number of Parameters in T5 variants, UL2

- XL => **3B**
- XXL => **11B**
- UL2 => **20B**

## Setup Model and Tokenizer

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")

Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [None]:
!nvidia-smi

Fri Apr 28 06:56:04 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    41W / 300W |  11724MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

Run a inference step.

In [None]:
inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(inputs, temperature=0.7)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))



['In a large saucepan, combine the ground beef, onion, garlic, tomato paste, tomato']


In [None]:
outputs.size(1)

20

In [None]:
def run_inference(sentence):
  inputs = tokenizer(sentence, return_tensors="pt")
  print("Input size:", inputs['input_ids'].size(1))
  outputs = model.generate(**inputs)
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)

## Helper to generate text

In [None]:
import textwrap

# set up a simple generation function
def generate_completion(input_string, max_length=50 ):
    inputs = tokenizer(input_string, return_tensors="pt").input_ids.to("cuda")
    print("Input size:", inputs.size(1))
    outputs = model.generate(inputs, 
                             temperature = 0.7,
                            max_length=max_length)
    print("Output size:", outputs.size(1))
    wrapped_text = textwrap.fill(tokenizer.decode(outputs[0], skip_special_tokens=True), width=100)
    return wrapped_text

## 1. Elementary Reasoning

Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?

In [None]:
%%time
generate_completion("Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?")

Input size: 38
Output size: 25
CPU times: user 1.24 s, sys: 0 ns, total: 1.24 s
Wall time: 1.23 s


'They bought 6 more apples, so they have 23 + 6 = 25 apples. Therefore, the answer is 25.'

## 2. Follow the Chain of Thought

In [None]:
input_string_02 = '''
Answer the following yes/no question.

Can you write a whole Haiku in a single tweet?
'''

input_string_02_CoT = '''
Answer the following yes/no question by reasoning step-by-step.

Can you write a whole Haiku in a single tweet?
'''

In [None]:
%%time
generate_completion(input_string_02)

Input size: 25
Output size: 3
CPU times: user 138 ms, sys: 0 ns, total: 138 ms
Wall time: 137 ms


'no'

In [None]:
%%time
generate_completion(input_string_02_CoT, 200)

Input size: 32
Output size: 36
CPU times: user 1.74 s, sys: 0 ns, total: 1.74 s
Wall time: 1.73 s


'Haiku is a Japanese poem that is around 108 characters long. A tweet is a short message sent on\nTwitter. Therefore, the final answer is yes.'

## 3. Zero Shot QA

In [None]:
%%time
input_string = '''
Q: Can Geoffrey Hinton have a conversation with George Washington? Give the rationale before answering.
'''
generate_completion(input_string, max_length=500)

Input size: 25
Output size: 25
CPU times: user 1.19 s, sys: 0 ns, total: 1.19 s
Wall time: 1.19 s


'George Washington died in 1789. Geoffrey Hinton was born in 1818. The answer: no.'

In [None]:
input_string = '''
Q: Could Marcus Aurelius have had dinner with George Washington? Give the rationale before answering.
'''

generate_completion(input_string)

'<pad> George Washington was born in 1799. Marcus Aurelius was born in 279 BC. So the final answer is\nno.</s>'

In [None]:
%%time
input_string = '''
X died in 1799. Y was born in 1924. Can X have a conversation with Y? Give the rationale.
'''
generate_completion(input_string, max_length=200)

Input size: 33
Output size: 43
CPU times: user 1.96 s, sys: 0 ns, total: 1.96 s
Wall time: 1.95 s


"X died in 1799 so he can't talk to Y. Y was born in 1924 so he can't talk to X. The final answer:\nno."

## 4. Zero Shot Generation

In [None]:
%%time
input_string = '''Write me a funny poem about a cat driving car.'''

generate_completion(input_string, max_length=200)

Input size: 14
Output size: 200
CPU times: user 9.56 s, sys: 0 ns, total: 9.56 s
Wall time: 9.54 s


'i have a cat that drives a car i drive it like a sailor i drive it like a sailor i drive it like a\nsailor i drive it like a sailor i drive it like a sailor i drive it like a sailor i drive it like a\nsailor i drive it like a sailor i drive it like a sailor i drive it like a sailor i drive it like a\nsailor i drive it like a sailor i drive it like a sailor i drive it like a sailor i drive it like a\nsailor i drive it like a sailor i drive it like a sailor'

In [None]:
%%time
input_string = '''Write a sad story about carrot named Jason. The story should \
start with the carrot being a professional athlete of some kind, \
and end with the carrot having his heart broken.'''

generate_completion(input_string, max_length=512)

Input size: 37
Output size: 512
CPU times: user 24.4 s, sys: 0 ns, total: 24.4 s
Wall time: 24.3 s


'Jason was a professional athlete. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a sprinter. He was a\nsprinter. He was a sprinter. He wa

## 5. Common sense reasoning

In [None]:
%%time
input_string = '''I am riding a bicycle. The pedals are moving fast. I look into the mirror and I am not moving. Why is this?'''

generate_completion(input_string, max_length=200)

Input size: 30
Output size: 7
CPU times: user 316 ms, sys: 0 ns, total: 316 ms
Wall time: 314 ms


'I am standing still.'

## 6. QA with Large Token Input

In [None]:
%%time
# Zero shot speech writing
input_string = '''Please answer the question:\n
Who is the OnePlus COO?\n\n
Output in the format: [first_name, surname]\n\n

Smartphone makers searched for a way forward at MWC 2023
Foldables, 6G, light shows -- there are a lot of ideas floating around, but no one has cracked the code
The slowdown was inevitable, of course. Nothing stays hot forever — especially in this industry. By tech standards, smartphones have had a good run, but the last few years have seen device makers searching for the magic bullet to help the sales slide reverse course. The arrival of 5G was a nice reprieve, but next-generation telecom standards don’t arrive every year.

It’s too early to say with certainly whether the move toward device repairability in the midst of new and proposed legislation will have a meaningful impact, but it was a highlight at this year’s show, which HMD turned into a central thesis. Regardless of how many people take advantage of the ability to repair their devices at home (or have a third party repair them), it’s another potential pain point for industry growth.
Foldables have seemingly performed many expectations (specifically for Samsung), but not nearly enough to really move the needle. Phone makers have a refresh problem. For a long time, phone purchases were inexorably tied to carrier plans, putting the devices on a two- or three-year cycle. Of course, the kinds of financing deals that let you spend less up front have a way of making you pay in the end.


There does seem to be a looming sense of carriers and manufacturers attempting to return to something similar with a new name.

“I think there’s going to be more of a movement toward models where devices themselves are sold more as a service,” Google’s Sameer Samat told me this week. “I think there’s a lot of innovative work going on in the carrier side to figure out how you buy a device for less up front, you use it and return it after a period of time and you get another device as part of your overall subscription.”
In a world where we don’t own our movies, music or software, the concept of “hardware as a service” is rapidly emerging as its own path forward. Like the move from physical albums to Spotify, it has trade-offs.
'''

inputs = tokenizer(input_string, return_tensors="pt").input_ids
print(len(inputs[0]))

generate_completion(input_string, max_length=200)

510
Input size: 510
Output size: 10
CPU times: user 481 ms, sys: 0 ns, total: 481 ms
Wall time: 479 ms


'[first_name, surname]'

## 7. Symmetrical Reasoning

In [None]:
%%time
input_string = '''The book didn't fit into the suitcase. what was too big?'''

generate_completion(input_string, max_length=100)

Input size: 16
Output size: 4
CPU times: user 183 ms, sys: 0 ns, total: 183 ms
Wall time: 182 ms


'the book'

In [None]:
%%time
input_string = '''The book didn't fit into the suitcase. what was too big?'''

generate_completion(input_string, max_length=100)

Input size: 16
CPU times: user 6.53 s, sys: 7.58 ms, total: 6.54 s
Wall time: 1.09 s


'the book'

## 8. Elementary Arithmetic Reasoning

In [None]:
generate_completion("Answer the following question by reasoning step by step. I have 7 apples. Bob gives me 10 apples. How many apple do I have?")

Input size: 29


'I have 7 + 10 = 17 apples. Therefore, the final answer is 17.'

## 9. Elementary Logical Reasoning

In [None]:
generate_completion("Answer by reasoning step by step. If A is greater than B and B is greater than C, is C greater than A?")

'<pad> B is greater than C so A is greater than B. Therefore, the final answer is no.</s>'