!pip install torch
!pip install transformers
# Download the model (350ish GB)
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-7b1")
model = AutoModel.from_pretrained("bigscience/bloom-7b1")

from transformers import pipeline
import torch
import time


s = time.time()
pipe = pipeline(model="bigscience/bloom-7b1", torch_dtype=torch.bfloat16)
print(f"Time to load model: {time.time()-s}")

# src: https://stackoverflow.com/questions/16816013/is-it-possible-to-print-using-different-colors-in-ipythons-notebook
from IPython.display import HTML as html_print

def cstr(s, color='black'):
    #return "<text style=color:{}>{}</text>".format(color, s)
    return "<text style=color:{}>{}</text>".format(color, s.replace('\n', '<br>'))

def cstr_with_newlines(s, color='black'):
    return "<text style=color:{}>{}</text>".format(color, s.replace('\n', '<br>'))

def local_inf(prompt, temperature=0.7, top_p=None, max_new_tokens=32, repetition_penalty=None, do_sample=False, num_return_sequences=1):  
    response = pipe(f"{prompt}", 
                    temperature = temperature, # 0 to 1
                    top_p = top_p, # None, 0-1
                    max_new_tokens = max_new_tokens, # up to 2047 theoretically
                    return_full_text = False, # include prompt or not.
                    repetition_penalty = repetition_penalty, # None, 0-100 (penalty for repeat tokens.
                    do_sample = do_sample, # True: use sampling, False: Greedy decoding.
                    num_return_sequences = num_return_sequences
                    )
    return html_print(cstr(prompt, color='#f1f1c7') + cstr(response[0]['generated_text'], color='#a1d8eb')), response[0]['generated_text']

s = time.time()



inp = """# Use OpenCV in Python"""
color_resp, resp = local_inf(inp, max_new_tokens=64)
color_resp

print(f"Time to perform inference: {time.time()-s}")

In [None]:
import transformers
from transformers import BloomForCausalLM
from transformers import BloomTokenizerFast
import torch
import time

In [None]:
output_text = []

In [None]:
result_length = 900

prompt = """
OBJECTIVES: This study sought to assess the association between long working 
hours, psychosocial safety climate (PSC), work engagement (WE) and new major 
depression symptoms emerging over the next 12 months. PSC is the work climate 
supporting workplace psychological health.
SETTING: Australian prospective cohort population data from the states of New 
South Wales, Western Australia and South Australia.
PARTICIPANTS: At Time 1, there were 3921 respondents in the sample. 
Self-employed, casual temporary, unclassified, those with working hours <35 (37% 
of 2850) and participants with major depression symptoms at Time 1 (6.7% of 
1782) were removed. The final sample was a population-based cohort of 1084 
full-time Australian employees.
PRIMARY AND SECONDARY OUTCOME MEASURES: The planned and measured outcomes were 
new cases of major depression symptoms.
RESULTS: Long working hours were not significantly related to new cases of major 
depression symptoms; however, when mild cases were removed, the 41-48 and 
≥55 long working hour categories were positively related to major depression 
symptoms. Low PSC was associated with a threefold increase in risk for new major 
depression symptoms. PSC was not related to long working hours, and long working 
hours did not mediate the relationship between PSC and new cases of major 
depression symptoms. The inverse relationship between PSC and major depression 
symptoms was stronger for males than females. Additional analyses identified 
that WE was positively related to long working hours. Long working hours (41-48 
and ≥55 hours) mediated a positive relationship between WE and major depression 
symptoms when mild cases of major depression were removed.
CONCLUSION: The results suggest that low workplace PSC and potentially long 
working hours (41-48; ≥55 hours/week) increase the risk of new major depression 
symptoms. Furthermore, high WE may increase long working hours and subsequent 
major depression symptoms.

**Questions:**
1. Who are the authors?
2. Is the sentiment postivie or negative?
3. What are the key ideas?
4. What topics are mentioned?
5. What are the main point?
6. What are the key words?

**Answers:**
1. 
"""

# 350M



In [None]:
current = time.time()

model_350M = BloomForCausalLM.from_pretrained("bigscience/bloom-350m") # 7b1
tokenizer_350M = BloomTokenizerFast.from_pretrained("bigscience/bloom-350m") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

In [None]:

inputs = tokenizer_350M(prompt, return_tensors="pt")


# Beam Search
current = time.time()

output = (tokenizer_350M.decode(model_350M.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))

output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Beam search: {time.time()-current}")

output_text.append('350M beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to Beam search:')
output_text.append(time.time()-current)
output_text.append('\n')


# Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_350M.decode(model_350M.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)

print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('350M topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')

# 560M

In [None]:
current = time.time()

model_560M = BloomForCausalLM.from_pretrained("bigscience/bloom-560m") # 7b1
tokenizer_560M = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

In [None]:
inputs = tokenizer_560M(prompt, return_tensors="pt")



# Beam Search
current = time.time()

output = (tokenizer_560M.decode(model_560M.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Beam search: {time.time()-current}")

output_text.append('560M beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to beam search:')
output_text.append(time.time()-current)
output_text.append('\n')
# Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_560M.decode(model_560M.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('560M topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')


## 760M

In [None]:
current = time.time()

model_760M = BloomForCausalLM.from_pretrained("bigscience/bloom-760m") # 7b1
tokenizer_760M = BloomTokenizerFast.from_pretrained("bigscience/bloom-760m") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

In [None]:
inputs = tokenizer_760M(prompt, return_tensors="pt")

# Beam Search
current = time.time()

output = (tokenizer_760M.decode(model_760M.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Beam search: {time.time()-current}")

output_text.append('760m beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to beam search:')
output_text.append(time.time()-current)
output_text.append('\n')

# Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_760M.decode(model_760M.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('760M topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')

# 1b3

In [None]:
current = time.time()

model_1b3 = BloomForCausalLM.from_pretrained("bigscience/bloom-1b3") # 7b1
tokenizer_1b3 = BloomTokenizerFast.from_pretrained("bigscience/bloom-1b3") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

In [None]:
inputs = tokenizer_1b3(prompt, return_tensors="pt")

In [None]:

# Beam Search
current = time.time()

output = (tokenizer_1b3.decode(model_1b3.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Beam search: {time.time()-current}")

output_text.append('1b3 beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to beam search:')
output_text.append(time.time()-current)
output_text.append('\n')

# Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_1b3.decode(model_1b3.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('1b3 topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')


# 2b5


In [None]:
current = time.time()

model_2b5 = BloomForCausalLM.from_pretrained("bigscience/bloom-2b5") # 7b1
tokenizer_2b5 = BloomTokenizerFast.from_pretrained("bigscience/bloom-2b5") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

In [None]:
inputs = tokenizer_2b5(prompt, return_tensors="pt")

# Beam Search
current = time.time()

output = (tokenizer_2b5.decode(model_2b5.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)

print(f"Time to Beam search: {time.time()-current}")


output_text.append('2b5 beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to beam search:')
output_text.append(time.time()-current)
output_text.append('\n')


# Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_2b5.decode(model_2b5.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('2b5 topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')

# 3b


In [None]:
current = time.time()

model_3b = BloomForCausalLM.from_pretrained("bigscience/bloom-3b") # 7b1
tokenizer_3b = BloomTokenizerFast.from_pretrained("bigscience/bloom-3b") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

In [None]:
inputs = tokenizer_3b(prompt, return_tensors="pt")

# Beam Search
current = time.time()

output = (tokenizer_3b.decode(model_3b.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Beam search: {time.time()-current}")

output_text.append('3b beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to beam search:')
output_text.append(time.time()-current)
output_text.append('\n')

# Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_3b.decode(model_3b.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
output = (output[(prompt.find('**Questions:**')):])
print(output)
print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('3b topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')


# 6b3

current = time.time()

model_6b3 = BloomForCausalLM.from_pretrained("bigscience/bloom-6b3") # 7b1
tokenizer_6b3 = BloomTokenizerFast.from_pretrained("bigscience/bloom-6b3") # 7b1 # 1b3

print(f"Time to load model: {time.time()-current}")

inputs = tokenizer_6b3(prompt, return_tensors="pt")


########### Beam Search
current = time.time()

output = (tokenizer_6b3.decode(model_6b3.generate(inputs["input_ids"],
                       max_length=result_length, 
                       num_beams=2, 
                       no_repeat_ngram_size=2,
                       early_stopping=True
                      )[0]))
print(output[(prompt.find('**Questions:**')):])
print(f"Time to Beam search: {time.time()-current}")

output_text.append('6b3 beam')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to beam search:')
output_text.append(time.time()-current)
output_text.append('\n')

############## Sampling Top-k + Top-p
current = time.time()

output = (tokenizer_6b3.decode(model_6b3.generate(inputs["input_ids"],
                       max_length=result_length, 
                       do_sample=True, 
                       top_k=50, 
                       top_p=0.9
                      )[0]))
print(output[(prompt.find('**Questions:**')):])
print(f"Time to Top-k + Top-p search: {time.time()-current}")

output_text.append('6b3 topk')
output_text.append('\n')
output_text.append(output)
output_text.append('Time to topk search:')
output_text.append(time.time()-current)
output_text.append('\n')


In [None]:
with open(r'C:/Users/Declan/Desktop/MDSI/2022/Spring/iLab 2/Project/example 7 bloom.doc', 'w') as fp:
    for item in output_text:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')