In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer  # type: ignore
model_name = "GPT2"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id


  from .autonotebook import tqdm as notebook_tqdm


In [21]:
tokenizer.encode(["what is this"])

[50256]

In [17]:
data = {"prompt": ["Once upon a time"]}
prompt = data["prompt"]
input_ids = tokenizer.encode(prompt, return_tensors="pt")

# Generate text using the model
outputs = model.generate(
    input_ids, pad_token_id=tokenizer.pad_token_id, max_length=10, num_return_sequences=1, no_repeat_ngram_size=2
)

# Decode and print the generated text
generated_text = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


In [12]:
from joblib import dump, load
import os
from time import gmtime, strftime

def name_with_timestamp(name):
    return '{}-{}'.format(name, strftime('%Y-%m-%d-%H-%M-%S', gmtime()))

model_artifact = {
    "model": model,
    "tokenizer": tokenizer
}

model_name_with_timestamp = name_with_timestamp(model_name)
if not os.path.exists(os.path.join("models/", model_name)):
    os.makedirs(os.path.join("models/", model_name))
dump(model_artifact, os.path.join("models/", model_name, model_name_with_timestamp + ".joblib"))

['models/GPT2/GPT2-2024-02-10-17-57-41.joblib']

In [31]:
tokenizer.padding_side = "left" 
def input_fn(data):
    if not (isinstance(data, list) and len(data) > 0):
        data = [data]
    inputs = tokenizer(data, return_tensors="pt")
    return inputs


def predict_fn(inputs, model):
    # Generate text using the model
    outputs = model.generate(
        **inputs,
        pad_token_id=tokenizer.pad_token_id,
        max_length=50,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
    )

    # Decode and print the generated text
    generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return generated_text

prompts = ["once upon a time, we have a lot of content to create"]

inputs = input_fn(prompts)
outputs = predict_fn(inputs, model)
outputs

["once upon a time, we have a lot of content to create. We have to make sure that we're not just going to be able to do it for a while, but we need to have the right tools to get it done.\n\n"]

In [18]:
class Model:
    def __init__(self, model_path, inference_path):
        self.model_path = model_path
        self.inference_path = inference_path

class ModelManager:
    def __init__(self):
        self._models = {}

    def save_to_local(self, model_name, model_path, inference_path, save_local=False):
        if not os.path.exists(os.path.join("models/", model_name)):
            os.makedirs(os.path.join("models/", model_name))
        new_model = Model(model_path, inference_path)
        self._models[model_name] = new_model

    def register(self, model_name):
        pass

model_manager = ModelManager()

def register_model(model_name, model_path, inference_path):
    model_manager.save_to_local(model_name, model_path, inference_path)
    model_manager.register(model_name)

['\nThe first time I saw the new version']

In [None]:
parser = ArgumentParser()

parser.add_argument("-src",
                    required=True,
                    help="path to some folder",
                    type=os.path.abspath)

In [35]:
import yaml

config_path = "configs/model_repository.yml"
def load_config_from_yaml(config_path):
    config_path = "configs/model_repository.yml"
    with open(config_path) as f:
        config = yaml.load(f, Loader=yaml.BaseLoader)

    return config["model_names"]

confs = load_config_from_yaml(config_path)
for x in confs:
    print(x)

GPT2
GPT3
GPT4
