In [9]:
import os
import json
import torch
import pickle
import hashlib
import warnings
import numpy as np
from time import time
from transformers import AutoTokenizer, AutoModelForCausalLM

In [2]:
%%time
name = "EleutherAI/gpt-neo-2.7B"
# get the model and tokenizer, 
# EleutherAI/gpt-neo-2.7B = 9.9GB compressed
# gpt2-xl (1.5Bn Params) = 6.7GB compressed
# always cache these models, reduces useless bandwidth.
tokenizer = AutoTokenizer.from_pretrained(name, cache_dir = "../../hf_cache/")
model = AutoModelForCausalLM.from_pretrained(name, cache_dir = "../../hf_cache/")
print("Model loaded ..., moving to GPU")
device = torch.device("cuda:0") if torch.cuda.is_available() else "CPU"
model = model.to(device)
model = model.eval()

Model loaded ..., moving to GPU
CPU times: user 1min 16s, sys: 17 s, total: 1min 33s
Wall time: 2min 48s


In [3]:
def folder():
  try:
    return os.path.dirname(os.path.realpath(__file__))
  except:
    return "."

In [4]:
def md5(t):
  return hashlib.md5(t.encode("utf-8")).hexdigest()

In [5]:
class History():
  """Class that records the responses given by GPT so you don't have to waste
  time copy pasting things into some notion file. Data is stored as a dictionary in
  a pickle object and each item is a dictionary with keys like this:
  {
    "b10a8db164e0754105b7a99be72e3fe5": {
      "prompt": "Hello world",
      "args": {"n": 10, "r": 2},
      "responses": [
        'Hello world!" "Hello?" "Yeah, I'm in',
        'Hello world" command, that is, the command that'
      ]
    }
    ...
  }
  
  NOTE: we only create keys based on the prompts and not on generation args so
        args are cached only for first time.
  """
  def __init__(self, loc = None):
    self.loc = os.path.join(folder(), 'gpt_history.p') if loc is None else loc
    hist = {}
    keys = []
    if os.path.exists(self.loc):
      # create the history list and load data from location
      st = time()
      with open(self.loc, "rb") as f:
        hist = pickle.load(f)
      keys = list(hist.keys())
      warnings.warn(f"Loading took: {time() - st:.2f}s")
    else:
      warnings.warn(f"No file found at: {self.loc}! Will create a new one.")
    self.hist = hist
    self.keys = keys

  def __len__(self):
    return len(self.hist)

  def __getitem__(self, i):
    return self.hist[i]

  def __repr__(self):
    n = -100; m = 70
    keys = self.keys[n:][::-1]
    _t = f"<gpt.History :: {len(self)} >\n"
    _t += "-" * 70 + "\n"
    print(keys)
    for i,x in enumerate(keys):
      _t += f"[{i:03d} :: {len(self.hist[x]['responses']):04d}] {self.hist[x]['prompt'][:60]}" + "\n"
    return _t
  
  def save(self):
    # save the pickle file
    with open(self.loc, "wb") as f:
      pickle.dump(self.hist, f)
  
  def add_item(self, x):
    assert isinstance(x, Response), "Input needs to be a gpt.Response object"
    key = md5(x.prompt)
    if key in self.keys:
      # this prompt already exists in the history, we don't update the gen kwargs
      n = self.hist[key]
      n["responses"] = list(set(self.hist[key]['responses'] + x.decoded))
    else:
      n = {
        "prompt": x.prompt,
        "responses": x.decoded,
        "args": x.gen_kwargs
      }
      self.keys.append(key)
    self.hist[key] = n
    
    self.save() # update the dataset

In [6]:
class Response():
  """Class that makes getting generated results chill, simply `print(out)`"""
  def __init__(self, prompt, out, t, gen_kwargs):
    self.prompt = prompt # need to add for history
    self.gen_kwargs = gen_kwargs # need to add for history
    self.t = t
    
    # get the generated hidden_states/attentions/strings
    self.sequences = out.sequences.cpu().tolist()
    self.scores = [x.cpu().numpy() for x in out.scores] if out.scores != None else None
    self.hidden_states = [
      [y.cpu().numpy() for y in x]
      for x in out.hidden_states
    ] if out.hidden_states != None else None
    self.attentions = [
      [y.cpu().numpy() for y in x]
      for x in out.attentions
    ] if out.attentions != None else None
    self.decoded = self.t.batch_decode(self.sequences, skip_special_tokens = True)

  def __repr__(self):
    str_ = ""
    for x in self.decoded:
      str_ += x + "\n"
      str_ += "-"* 70 + "\n"
    return str_

  def __len__(self):
    return len(self.decoded)

  def __getitem__(self, i):
    return self.decoded[i]
  
  def __iter__(self):
    for x in self.decoded:
      yield x

In [7]:
class GPT():
  """Make GPT a first class object and using it as simple as possible.
  First define the model and tokenizer
  
  >>> device = torch.device("cuda:0") if torch.cuda.is_available() else "CPU"
  >>> tokenizer = AutoTokenizer.from_pretrained(name)
  >>> model = AutoModelForCausalLM.from_pretrained(name, cache_dir = "../hf-cache/").eval().to(device)
  Make GPT wrapper: output is `Response`, a class with __repr__ overloaded so print gives the generation
  >>> gpt = GPT(model, tokenizer)
  >>> out = gpt("Hello world", n = 10, r = 2)
  >>> out
  ... Hello world!" "Hello?" "Yeah, I'm in
      ----------------------------------------------------------------------
      Hello world" command, that is, the command that
      ----------------------------------------------------------------------
  """
  def __init__(self, model, tokenizer, history_loc = None):
    self.model = model
    self.tokenizer = tokenizer
    self.eot_id = tokenizer.eos_token_id
    self.device = self.model.device
    
    self.history = History()

  @torch.no_grad()
  def __call__(
    self,
    prompt: str,
    n: int = 16, # number of tokens
    r: int = 1, # number of sequences
    do_sample = True,
    temp = 0.9,
    top_p = 0.9,
    top_k = None,
    output_scores = None,
    output_hidden_states = None,
    output_attentions = None,
    stop_sequence = None,
    return_response = True,
    **gen_kwargs
  ):
    """ __call__ overloader initialises the model.generate() function. We emphasise
    a lot more on most powerful arguments, but you can always pass custom kwargs through
    `gen_kwargs`. As you can see that we have not added many beam-search related arguments.
    Args:
      prompt (str): prompt string, tokens will be generated in continuation
      n (int, optional): number of tokens to return
      r (int, optional): number of sequences to return
      temp (float, optional): sampling temperature
      top_p (float, optional): tokens whose probability adds up to this are considered
      top_k (int, optional): top-k tokens to consider for each distribution
      output_scores (bool, optional): output scores for each generted token, returns shape `[r,n]`
      output_hidden_states (bool, optional): output the hidden states of the generation, returns shape `[r,n+1,...]`
      output_attentions (bool, optional): Whether or not to return the attentions tensors of all attention layers
      stop_sequence (str, optional): Stop generation once the first token of this string is achieved
      return_response (bool, optional): To parse the generated dictionary to `Response` class
      gen_kwargs (dict, optional): any extra arguments to pass to the model.generate() method
    Returns:
      if return_response:
       Response instance
      else:
       model.generate() output
    """
    t = self.tokenizer
    m = self.model
    
    # tokenize the input prompt and stop token if provided
    input_ids = t(prompt, return_tensors = "pt")["input_ids"].to(self.device)
    if stop_sequence is not None:
      eos_token_id = t(stop_sequence)["input_ids"][0]
    else:
      eos_token_id = self.eot_id
      
    # generate the items
    out = m.generate(
      input_ids,
      max_length = len(input_ids[0]) + n,
      temperature = temp,
      top_p=top_p,
      top_k=top_k,
      num_return_sequences=r,
      pad_token_id = self.eot_id,
      output_scores = output_scores,
      output_hidden_states = output_hidden_states,
      output_attentions = output_attentions,
      do_sample = do_sample,
      return_dict_in_generate = True,
      eos_token_id = eos_token_id,
      **gen_kwargs
    )
    
    # return items or 
    if return_response:
      x = Response(prompt, out, t, {
        "n": n, "r": r,                                
        "do_sample": do_sample,
        "temp": temp,
        "top_p": top_p,
        "top_k": top_k,
        **gen_kwargs
      })
      self.history.add_item(x)
      return x
    else:
      return out

  def classify(
    self,
    prompt: str,
    labels: list,
    softmax_temp = 0.9,
    add_unknown = False,
    **gen_kwargs,
  ) -> dict:
    """Perform classification directly.
    NOTE: ensure that first tokens in labels are not the same.
    Args:
      prompt (str): prompt string to be given as input
      labels (list): list of strings that are labels
      gen_kwargs (dict, optional): extra arguments to be passed for generation
      softmax_temp (float, optional): temprature for scoring labels. Defaults to 0.9.
      add_unknown (bool, optional): adds an extra "Unknown" label. Defaults to False.
    Returns:
      dict: values are 0. if model returns 'nan'
    """
    # we will use the same format that OpenAI uses for GPT-3
    # read: https://beta.openai.com/docs/guides/classifications
    # We normalize all labels by `label.strip().lower().capitalize()` at the API
    # backend. Thus corresponding output labels are always capitalized.
    unq_options = set([x.strip().lower().capitalize() for x in labels])
    unq_options = sorted(list(unq_options))

    # each label must have a distinct first token, because classification
    # works by looking only one step ahead. Also encode the labels with extra
    # white space prepended.
    label_ids = [self.tokenizer.encode(" " + x)[0] for x in unq_options]
    
    out = self(prompt, n = 1, r = 1, output_scores = True, **gen_kwargs, return_response = False)
    logits = out.scores[0][0]
    logits = (logits / softmax_temp)[label_ids].softmax(-1).cpu()
    logits = logits.numpy()

    scores = {o:i for o,i in zip(unq_options, logits)}
    
    # naaaaan - check
    scores = {k: 0. if np.isnan(l) else l for k,l in scores.items()}

    if add_unknown:
      # fill the Probability for the special "Unknown" token
      scores["Unknown"] = 1 - sum(scores.values())
    
    return scores

In [12]:
gpt = GPT(model, tokenizer)



In [None]:
%%time
prompt = """Here we are trying to ideate Server Side Cloud Cost Optimisation offerings for a tech startup company

Cloud provides the ability to scale systems to never before heard sizes and complexity while solving some of the most pressing issues of the time.

Cloud costs are now the biggest expense of any IT company growing exponentially over the past 5 years, with no likely way to slow things down. This means that organizations will have to be much smarter when it comes to using the cloud and it's services.

1. We will be building systems that take in bills from the cloud providers or consume billing information from an API and provide use OCR to extract information and provide optimal solutions for managing the resources.
2. We intend to build personalised solutions for different organizations that can help better manage cloud services. This would be built using the data obtained from public sources as well as from the billing and other information from the organizations.
3."""

total_responses = []

for i in range(200):
    
    out = gpt(prompt, n=100, temp=0.98, top_p=1, stop_sequence='4', r=5)
    responses = [response[len(prompt)+1:].split('\n')[0].strip() for response in out.decoded]
    
    total_responses.extend(responses)

for i,response in enumerate(total_responses[:5],1):
    print(f'{i}. {response}\n')

In [None]:
with open('../data/I1_1_1000.txt','w') as txtfile:
    r = '\n'.join(total_responses)
    txtfile.write(r)

In [None]:
%%time
prompt = """Here we are trying to list some business ideas that would not cost too much

Businesses that have high profit margins may cost a little more, but can still be afforable on a loan.
Businesses that have lower profit margins should almost always cost less because a loan would not be feasible.

1. Ice Cream Truck. One could purchase an ice cream truck for as low as $20000 and get it up and running. Ice creams are quite profitable in summer seasons, but are not profitable during winter months.
2. Woodworking Shop. One could set up a woodworking store for as low as $6000, excluding rent, taking the total up to about $12000 to start up. These are not too profitable, but the low start up costs make it a lucrative option.
3. Cleaning and Care services. This business does not necessarily need a store, which saves money. For as low as $3000, one can get all the required equipment and start their business. This option is moderately profitable, but demand depends on location.
4."""

total_responses = []

for i in range(200):
    
    out = gpt(prompt, n=100, temp=0.98, top_p=1, stop_sequence='5', r=5)
    responses = [response[len(prompt)+1:].split('\n')[0].strip() for response in out.decoded]
    
    total_responses.extend(responses)

for i,response in enumerate(total_responses[:5],1):
    print(f'{i}. {response}\n')

In [None]:
with open('../data/I2_1_1000.txt','w') as txtfile:
    r = '\n'.join(total_responses)
    txtfile.write(r)

In [11]:
%%time
prompt = """A Company needs to cut costs due to financial strain. Here are some things the company management should consider:

1. Cut marketing costs. Cutting Marketing costs will reduce inflow of potential new clients, but will save money in the immediate term while the company floats on its regular clientele.
2. Layoff employees. The company could consider reducing its workforce temporarily by 20-30%, keeping the core team and main operations personnel.
3. Take a business loan. After assessing the liabilities it already has, the company could request a new line of credit which would help it stay afloat for a certain time.
4."""

total_responses = []

for i in range(200):
    
    out = gpt(prompt, n=100, temp=0.98, top_p=1, stop_sequence='5', r=5)
    responses = [response[len(prompt)+1:].split('\n')[0].strip() for response in out.decoded]
    
    total_responses.extend(responses)

for i,response in enumerate(total_responses[:5],1):
    print(f'{i}. {response}\n')

1. Consolidate operations. The company could take on a few new businesses, like selling insurance, auto repair, and similar, and then take over operations of these businesses in its place.

2. Consolidate. Consolidating the company means eliminating the divisional or branch network. This is more time consuming, but can help boost the revenue considerably.

3. Consider increasing dividends. A company with high dividends can increase the earnings of a dividend and increase the number of shares outstanding.

4. Refinance an old debt. Even if a company has little debt in the bank, it can still leverage itself by refinanceing a portion of its existing debt with other banks. After refinancing, it can borrow more money if the company requires additional capital to keep it afloat.

5. Cut down on staff and administration costs.

CPU times: user 1h 3min 43s, sys: 55 s, total: 1h 4min 38s
Wall time: 1h 4min 37s


In [None]:
with open('../data/I3_1_1000.txt','w') as txtfile:
    r = '\n'.join(total_responses)
    txtfile.write(r)