<a href="https://colab.research.google.com/github/0xbageltoes/dcm-colabs/blob/main/email_autocomplete_demo_125M.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <center> demo - email generation

- link to [email generation model card](https://huggingface.co/pszemraj/opt-350m-email-generation)
- created by [Peter Szemraj](https://peterszemraj.ch/)



In [None]:
%%capture
#@title set up auto-formatting of cells in notebook

from IPython.display import HTML, display


def set_css():
    display(
        HTML(
            """
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  """
        )
    )
get_ipython().events.register("pre_run_cell", set_css)

In [None]:
#@title check  system stats
from psutil import virtual_memory
import os
ram_gb = round(virtual_memory().total / (1024**3), 1)
print(f'Runtime has {ram_gb} gigs of memory and {os.cpu_count()} processors')

Runtime has 12.7 gigs of memory and 2 processors


In [None]:
!nvidia-smi

Sun Jul 10 13:57:38 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install -U transformers sentencepiece -q

[K     |████████████████████████████████| 4.4 MB 14.0 MB/s 
[K     |████████████████████████████████| 1.2 MB 50.6 MB/s 
[K     |████████████████████████████████| 101 kB 13.3 MB/s 
[K     |████████████████████████████████| 6.6 MB 55.9 MB/s 
[K     |████████████████████████████████| 596 kB 49.6 MB/s 
[?25h

#* load model

In [None]:
hf_tag = "pszemraj/opt-125m-email-generation" #@param {type:"string"}

In [None]:
import torch
from transformers import pipeline
email_gen = pipeline(
    'text-generation', 
    hf_tag, 
    use_fast=False,
    device = 0 if torch.cuda.is_available() else -1,
)


Downloading:   0%|          | 0.00/684 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/908 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/976k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/434 [00:00<?, ?B/s]

In [None]:
import pprint as pp

def call_model(prompt:str, 
                num_beams=8, 
                min_length=4, 
                max_length=64,
                no_repeat_ngram_size=3,
                temperature=0.3,
            ):
    """ helper function for the pipeline object """
    result = email_gen(

        prompt, 
        min_length=min_length, max_length=max_length,
        no_repeat_ngram_size = no_repeat_ngram_size,
        repetition_penalty=3.5,
        length_penalty=0.8,
        temperature=temperature,
        remove_invalid_values=True,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True,
        do_sample=False,
        early_stopping=True,
        num_beams=num_beams,
        return_full_text=False,

    )

    w_prompt = f"<PROMPT>{prompt}<END-OF-PROMPT>"
    response = result[0]['generated_text']
    pp.pprint(w_prompt + response)

# generate responses

- higher `num_beams` = better quality, more compute intensive
- adjust `max_length` for longer responses which are also obviously more compute intensive
 

In [None]:
prompt = """
Good Afternoon,

FYI, it is extremely important that"""

call_model(prompt, max_length=128, num_beams=4)

('<PROMPT>\n'
 'Good Afternoon,\n'
 '\n'
 "FYI, it is extremely important that<END-OF-PROMPT> <COMPANY>'s Board of "
 'Directors meet today at <NUMBER>:<NUMBER> a.m. in EB00C0 to finalize the '
 'transaction.\n'
 'Please call me if you have any questions or concerns.\n'
 'Thank you for your cooperation.\n'
 'Regards, Norman Schwarzkopf Executive Administrator to the President & CEO '
 'Mariner Energy, Inc. <PHONE> (fax) <EMAIL> <mailto:<EMAIL>.\n'
 'This e-mail and any attachments (the "message") is intended solely')


In [None]:
prompt = """
Morning,

We sent out the survey last night to all participants and will"""
call_model(prompt, max_length=128, num_beams=4)


('<PROMPT>\n'
 'Morning,\n'
 '\n'
 'We sent out the survey last night to all participants and '
 'will<END-OF-PROMPT> send it out tonight.\n'
 'If you have any questions, please call me at <NUMBER>-<NUMBER>.\n'
 'Thank you for taking the time to fill out our survey.\n'
 'Your input is crucial to our continued efforts in establishing and providing '
 'you with World Class Support.\n'
 'Please take a minute and complete the attached survey then submit it back to '
 'us when you are done.\n'
 'Once again, thank you for your participation.\n'
 'Sincerely, Mark Haedicke Executive Assistant to President & CEO Mariner '
 'Energy, Inc. <')


In [None]:
prompt = """
Hello, 
I just wanted to follow up on the bubblegum shipment. We paid a lot of money"""
call_model(prompt, max_length=64, num_beams=8)


('<PROMPT>\n'
 'Hello, \n'
 'I just wanted to follow up on the bubblegum shipment. We paid a lot of '
 'money<END-OF-PROMPT> for it and I think we will be able to use it in the '
 'future.\n'
 'Please let me know if you have any questions.\n'
 'Thanks, Liz Taylor x0-<NUMBER>')
