In [1]:
import os
fsRoot = '/tmp/llm'
os.environ['HF_HOME'] = fsRoot + '/misc'
os.environ['HF_DATASETS_CACHE'] = fsRoot + '/datasets'
os.environ['TRANSFORMERS_CACHE'] = fsRoot + '/models'

modelID = "google/flan-t5-large"

zipRoot = fsRoot + '/root'
modelDest = zipRoot + '/saved' 
tokenDir = zipRoot + '/token' 

import pathlib
dirs = [fsRoot + '/misc', fsRoot + '/datasets', fsRoot + '/models', zipRoot,  modelDest, tokenDir ]
for d in dirs:
    pathlib.Path(d).mkdir(parents=True, exist_ok=True)

In [2]:
from snowflake.snowpark.session import Session
import snowflake.connector
import snowflake.snowpark.functions as F

from transformers import T5Tokenizer,T5ForConditionalGeneration, T5Config
import torch

import json

Create a connection to Snowflake, Snowpark supports the following authentification methods:
* Username and password
* externalbrowser (Okta, ADFS, or any other SAML 2.0-compliant identity provider (IdP))
* oauth
* Key pair

This example is using a JSON file with the following structure
```
{
    "account":"MY SNOWFLAKE ACCOUNT",
    "user": "MY USER",
    "password":"MY PASSWORD",
    "role":"MY ROLE",
    "warehouse":"MY WH",
    "database":"MY DB",
    "schema":"MY SCHEMA"
}


In [3]:
with open('../config.json') as f:
    data = json.load(f)
    USERNAME = data['user']
    PASSWORD = data['password']
    SF_ACCOUNT = data['account']
    SF_ROLE = data['role']
    SF_WH = data['warehouse'] # std_warehouse == STD_WH, max_warehouse == MAX_CON=1
    SF_DB = data['database']
    SF_SCHEMA = data['schema']

connection_parameters = {
    "account": SF_ACCOUNT,
    "user": USERNAME,
    "password": PASSWORD,
    "role": SF_ROLE,
    "warehouse": SF_WH,
    "database": SF_DB,
    "schema": SF_SCHEMA
}

session = Session.builder.configs(connection_parameters).create()

con = snowflake.connector.connect(
    user=USERNAME, #You can get it by executing in UI: desc user <username>; 
    # Or Snowflake APP UI --> "HOME icon" | Profile | Username
    account=SF_ACCOUNT, #Add all of the account-name between https:// and snowflakecomputing.com
    password=PASSWORD,
    database=SF_DB,
    warehouse=SF_WH,
    role=SF_ROLE,
    schema=SF_SCHEMA
)

cur = con.cursor()

In [4]:

#sqlStatment = 'select current_account(), current_role(), current_user(), current_database(), current_schema(), current_warehouse(), current_version()'
sqlStatment = 'select  current_role(),  current_version()'
session.sql(sqlStatment).show()
sqlStatment = f"show warehouses like '{SF_WH}'"
session.sql(sqlStatment).collect()
sqlStatment = 'SELECT "type" as warehouseType FROM table(result_scan(last_query_id()))'
session.sql(sqlStatment).show()

------------------------------------------
|"CURRENT_ROLE()"  |"CURRENT_VERSION()"  |
------------------------------------------
|ACCOUNTADMIN      |7.31.0               |
------------------------------------------

----------------------
|"WAREHOUSETYPE"     |
----------------------
|SNOWPARK-OPTIMIZED  |
----------------------



## Confirm expected Downloads Sizes (e.g. pytorch_model.bin)
  * flan-t5-large == 3.1 GB
  * size could be limited within Sandbox and you may run out of room in /tmp
  

In [5]:
tokenizer = T5Tokenizer.from_pretrained(modelID, cache_dir=fsRoot)
model = T5ForConditionalGeneration.from_pretrained(modelID, cache_dir=fsRoot)

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=True`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

## Serialize model and tokenizers
  
  * Use torch to save model as dictionary
  * Save the model config.json
  * tokenizer is saved to a direcotry then zipped


In [6]:
torch_dir = False

torch.save(model.state_dict(), modelDest + '/pydict.pt')
config = T5Config.from_pretrained(modelID)
config.to_json_file(modelDest + '/config.json')
print ('Saving Dictionary Model to:', modelDest)

if (torch_dir):
    model.save_pretrained(modelDest)
    print ('Saving  Model to:', modelDest)

tokenizer.save_pretrained(tokenDir)
print ('Saving Tokenizer to:', tokenDir)

Downloading (…)lve/main/config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Saving Dictionary Model to: /tmp/llm/root/saved
Saving Tokenizer to: /tmp/llm/root/token


In [7]:
import zipfile

def mkZip(inDir, zipName, overwrite=False):
    zip_name = zipName + '.zip'
    isExists = os.path.exists(zip_name)

    if (not isExists) or (overwrite):
        with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zip_ref:
            for folder_name, subfolders, filenames in os.walk(inDir):
                for filename in filenames:
                    file_path = os.path.join(folder_name, filename)
                    zip_ref.write(file_path, arcname=os.path.relpath(file_path, zip_name))

        zip_ref.close()

    file_stats = os.stat(zip_name)
    return (zip_name, file_stats.st_size)

In [8]:
# Check current working directory.
retval = os.getcwd()

# Now change the directory
os.chdir( zipRoot )

# Check current working directory.
retval = os.getcwd()

print ("Directory changed successfully %s" % retval)

Directory changed successfully /private/tmp/llm/root


## Create the tokenizer ZIP
  * tokenizer is small
  * tokenizer files can vary (may need 'n' with 2 <= n <=6 ?)

In [9]:
inputDir = 'token'
zipName = 'ztoken'
zipFile = zipName + '.zip'
(zip_name, zipSize) = mkZip(inputDir, zipName, overwrite=True)
zipPath = zipRoot + '/' + zipFile
print (f'{zip_name} : {zipSize / (1024*1024)} MB')
print (inputDir, zipPath)

ztoken.zip : 0.3941621780395508 MB
token /tmp/llm/root/ztoken.zip


## Copy the tokenizer ZIP file to a stage
  * tokenizer is small
  * tokenizer files can vary (may need 'n' with 2 <= n <=6 ?)
  * zip file is also useful for file synchronization

In [10]:
session.sql('CREATE OR REPLACE STAGE TMP_LLM_3GB_STAGE').collect()

[Row(status='Stage area TMP_LLM_3GB_STAGE successfully created.')]

In [11]:
cmd = "PUT file://" + zipPath + " @TMP_LLM_3GB_STAGE AUTO_COMPRESS=FALSE OVERWRITE=TRUE;"
print (cmd)
cur.execute(cmd)

PUT file:///tmp/llm/root/ztoken.zip @TMP_LLM_3GB_STAGE AUTO_COMPRESS=FALSE OVERWRITE=TRUE;


<snowflake.connector.cursor.SnowflakeCursor at 0x7fedf28ddf60>

In [12]:
import glob
print (f'Loading files from: {modelDest}')
uploadFiles = glob.glob(modelDest+'/*')
uploadFiles

Loading files from: /tmp/llm/root/saved


['/tmp/llm/root/saved/config.json', '/tmp/llm/root/saved/pydict.pt']

## Upload the Large Model files (dictionary) and config file
  * Note - the assumption the weights are in a single file is not correct for larger LLMs
  * e.g. a larger LLMs weights may be split up into multiple files

In [13]:
for zipPath in uploadFiles:
    cmd = "PUT file://" + zipPath + " @TMP_LLM_3GB_STAGE AUTO_COMPRESS=FALSE OVERWRITE=TRUE;"
    print (cmd)
    cur.execute(cmd)
    file_stats = os.stat(zipPath)
    print (f'{zipPath} : {file_stats.st_size / (1024*1024)} MB')

PUT file:///tmp/llm/root/saved/config.json @TMP_LLM_3GB_STAGE AUTO_COMPRESS=FALSE OVERWRITE=TRUE;
/tmp/llm/root/saved/config.json : 0.0007104873657226562 MB
PUT file:///tmp/llm/root/saved/pydict.pt @TMP_LLM_3GB_STAGE AUTO_COMPRESS=FALSE OVERWRITE=TRUE;
/tmp/llm/root/saved/pydict.pt : 2987.7108659744263 MB


## Confirm files within stage

In [14]:
session.sql("ls @TMP_LLM_3GB_STAGE").show()

--------------------------------------------------------------------------------------------------------------------
|"name"                         |"size"      |"md5"                                 |"last_modified"               |
--------------------------------------------------------------------------------------------------------------------
|tmp_llm_3gb_stage/config.json  |752         |de5837bfe22958ed40a3ce95e53cd0fe      |Mon, 4 Sep 2023 17:54:14 GMT  |
|tmp_llm_3gb_stage/pydict.pt    |3132841920  |886a400b58e78ccbff3ac6a4ddc30887-374  |Mon, 4 Sep 2023 17:54:45 GMT  |
|tmp_llm_3gb_stage/ztoken.zip   |413312      |fba625321f736711054d648817e9fd0f      |Mon, 4 Sep 2023 17:54:14 GMT  |
--------------------------------------------------------------------------------------------------------------------



In [15]:
sql_statement = '''CREATE OR REPLACE FUNCTION zip_large_llm_infer(str string)
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = 3.8
HANDLER = 'large_llm_inference'
PACKAGES = ('transformers', 'pytorch', 'sentencepiece')
IMPORTS = ('@TMP_LLM_3GB_STAGE/pydict.pt', '@TMP_LLM_3GB_STAGE/config.json', '@TMP_LLM_3GB_STAGE/ztoken.zip')
AS
$$
import fcntl
import os
import sys
import re
import threading
import zipfile
from transformers import T5Tokenizer,T5ForConditionalGeneration, T5Config
import torch

os.environ["HF_DATASETS_OFFLINE"] = "1"
os.environ["TRANSFORMERS_OFFLINE"] = "1" 

# File lock class for synchronizing write access to /tmp.
class FileLock:
  def __enter__(self):
    self._lock = threading.Lock()
    self._lock.acquire()
    self._fd = open('/tmp/lockfile.LOCK', 'w+')
    fcntl.lockf(self._fd, fcntl.LOCK_EX)

  def __exit__(self, type, value, traceback):
    self._fd.close()
    self._lock.release()

# Get the location of the import directory. Snowflake sets the import
# directory location so code can retrieve the location via sys._xoptions.
IMPORT_DIRECTORY_NAME = "snowflake_import_directory"
import_dir = sys._xoptions[IMPORT_DIRECTORY_NAME]

# Get the path to the ZIP file and set the location to extract to.
model_file_path = import_dir + "pydict.pt"
model_json_file_path = import_dir + "config.json"
token_zip_file_path = import_dir + "ztoken.zip"
extracted = '/tmp'
modelSrc = extracted + '/token'

# make sure the ZIP file is up to date (e.g. OVERWRITE old copy)
with FileLock():
  if not os.path.isdir(modelSrc):
    with zipfile.ZipFile(token_zip_file_path, 'r') as myzip:
      myzip.extractall(extracted)

# Load the model from the extracted file.
tokenizer = T5Tokenizer.from_pretrained(modelSrc)

model = T5ForConditionalGeneration.from_pretrained(pretrained_model_name_or_path=None, \
  config=model_json_file_path, \
    state_dict =  torch.load(model_file_path))

def large_llm_inference(text):
  input_ids = tokenizer(text, return_tensors="pt").input_ids
  outputs = model.generate(input_ids ,max_length=50)
  response = tokenizer.decode(outputs[0])
  response = response.replace(tokenizer.pad_token, "").replace(tokenizer.eos_token, "")
  return (response.strip().upper())
  
$$;
'''
cur.execute(sql_statement)

<snowflake.connector.cursor.SnowflakeCursor at 0x7fedf28ddf60>

In [16]:
sqlStatment = f"select 'translate English to French: What time is it??' as input, zip_large_llm_infer( 'translate English to French: What time is it??' ) as llm_result"
session.sql(sqlStatment).show()

-------------------------------------------------------------------------------
|"INPUT"                                         |"LLM_RESULT"                |
-------------------------------------------------------------------------------
|translate English to French: What time is it??  |<UNK> QUELLE HEURE EST-CE?  |
-------------------------------------------------------------------------------



## Optional -- Assumes of you have a table of prompts

In [17]:
prompt_df = session.table("FLAN_PROMPT")
print(f"Nbr of prompts: {prompt_df.count():,}")
#prompt_df.show(max_width=150)

Nbr of prompts: 14


In [18]:
prompt_df.select(F.col("PROMPT")).select(F.col("PROMPT"), F.call_function("zip_large_llm_infer", F.col("PROMPT"))).show(15, max_width=150)

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"PROMPT"                                                                                                                                                |"ZIP_LARGE_LLM_INFER(""PROMPT"")"                                                          |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|translate English to French: What time is it??                                                                                                          |<UNK> QUELLE HEURE EST-CE?                                                                 |
|translate E

# Done - ZZZZ