In [None]:
# !uv pip install --upgrade uvicorn watchfiles jupyterlab jupyter-server

In [None]:
# !uv pip install -U transformers huggingface_hub datasets

In [None]:
# syft absolute
import syft as sy

In [None]:
# stdlib
import os

os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

In [None]:
# sy.enable_autoreload()

In [None]:
# Launch the domain nodes we setup in the previous notebook
canada_node = sy.orchestra.launch(
    name="canada-domain", port='auto', dev_mode=True, reset=True
)

In [None]:
domain_client = canada_node.login(email="info@openmined.org", password="changethis")

In [None]:
model = sy.Model(name="Gemma", model_code="a")

In [None]:
model.set_description(
    "Gemma is a set of lightweight, generative artificial intelligence (AI) open models. Gemma models are available to run in your applications and on your hardware, mobile devices, or hosted services. You can also customize these models using tuning techniques so that they excel at performing tasks that matter to you and your users. Gemma models are based on Gemini models and are intended for the AI development community to extend and take further."
)

In [None]:
model.add_citation("Person, place or thing")
model.add_url(
    "https://cloud.google.com/vertex-ai/generative-ai/docs/open-models/use-gemma"
)

In [None]:
model.add_contributor(
    name="Thomas Mesnard",
    email="thomas@email.com",
    note="This paper was fun!",
)

In [None]:
model_folder = "./gpt2"

In [None]:
# third party
from huggingface_hub import snapshot_download

snapshot_download(
    repo_id="openai-community/gpt2",
    ignore_patterns=["*.tflite", "*.msgpack", "*.bin", "*.ot", "*.h5", "onnx/*"],
    local_dir=model_folder
)

In [None]:
model_files = sy.SyftFolder.from_dir(name="gpt2", path=model_folder)
model_files

In [None]:
model_files.files

In [None]:
asset = sy.ModelAsset(name="weights", data=model_files)

In [None]:
model.add_asset(asset)

In [None]:
domain_client.upload_model(model)

In [None]:
model_files.

In [None]:
# upload evals dataset

In [None]:
model_ptr = None
evals_ptr = None

In [None]:
# before passing in model
# get model_code and eval
# run __init__
# pass in inited model object to func


@sy.syft_function_single_use(model=model_ptr, evals=evals_ptr)
def run_eval(model, evals):
    results = []
    for prompt in evals:
        result = model.inference(prompt)
        results.append(result)

    return results

In [None]:
prompt = "GPT2 is a model developed by OpenAI."

In [None]:
gen_text

In [None]:
class ModelAssets:
    pass

In [None]:
# stdlib
from typing import Any

In [None]:
class SyftModelClass:
    def __init__(self, assets) -> None:
        self.__user_init__(assets)

    def __user_init__(self, assets) -> None:
        pass

    def inference(self) -> Any:
        pass

In [None]:
class GPT2Model(SyftModelClass):
    def __user_init__(self, assets) -> None:
        syft_files = assets[0]
        model_folder = syft_files.model_folder

        # third party
        from transformers import AutoModelForCausalLM
        from transformers import AutoTokenizer

        self.model = AutoModelForCausalLM.from_pretrained(model_folder)
        self.tokenizer = AutoTokenizer.from_pretrained(model_folder)

    def inference(self, prompt: str) -> Any:
        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
        gen_tokens = self.model.generate(
            input_ids,
            do_sample=True,
            temperature=0.9,
            max_length=100,
        )
        return gen_tokens
        gen_text = tokenizer.batch_decode(gen_tokens)[0]
        return gen_text

In [None]:
model = GPT2Model()

In [None]:
a = model.inference("What is a dog?")

In [None]:
type(a)

In [None]:
b = sy.serialize(a, to_bytes=True)

In [None]:
type(b)

In [None]:
b

In [None]:
def loop(weights):
    # pointer to weights
    # copy and paste huge 1000 line model class
    for i in range(dataset):
        result = model.inference(i)

In [None]:
def loop(model):
    for i in range(dataset):
        result = model.inference(i)

In [None]:
def folder_to_zip_bytes(folder_path):
    # stdlib
    from io import BytesIO
    import os
    import zipfile

    # Create a BytesIO object to hold the zip file in memory
    zip_buffer = BytesIO()
    
    # Create a zip file in the BytesIO object
    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        # Walk the directory structure
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # Create the full file path
                file_path = os.path.join(root, file)
                # Write the file to the zip file with the proper relative path
                relative_path = os.path.relpath(file_path, folder_path)
                zip_file.write(file_path, relative_path)
    
    # Seek to the beginning of the BytesIO object to read its content
    zip_buffer.seek(0)
    return zip_buffer.read()

In [None]:
def extract_zip_bytes_to_folder(zip_bytes, extract_folder_path):
    # stdlib
    from io import BytesIO
    import os
    import zipfile

    os.makedirs(extract_folder_path, exist_ok=True)

    # Create a BytesIO object from the zip bytes
    zip_buffer = BytesIO(zip_bytes)
    
    # Open the zip file from the BytesIO object
    with zipfile.ZipFile(zip_buffer, 'r') as zip_file:
        # Extract all files to the specified folder
        zip_file.extractall(extract_folder_path)

In [None]:
def folder_to_action_obj(folder_path, keep_files=None):
    zip_bytes = folder_to_zip_bytes(folder_path, keep_files)
    zip_action_obj = sy.ActionObject.from_obj(zip_bytes)
    return zip_action_obj


def get_serde_size(obj):
    p = sy.serialize(obj, to_bytes=True)
    return len(p) / 1024 / 1024

In [None]:
# stdlib
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"