Article: https://www.snowflake.com/blog/container-services-llama2-snowpark-ml/

Compute Pool: skhara_compute_gpu7

In [None]:
!pip install transformers

In [80]:
from snowflake.snowpark.session import Session
import snowflake.snowpark.types as T
import snowflake.snowpark.functions as F
from snowflake.snowpark.functions import col

import snowflake.ml.modeling.preprocessing as snowml
from snowflake.ml.modeling.xgboost import XGBClassifier
from snowflake.ml.modeling.preprocessing import KBinsDiscretizer, OrdinalEncoder, OneHotEncoder
from snowflake.ml.modeling.impute import SimpleImputer

import json

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [111]:
connection_parameters = json.load(open('creds.json'))
session = Session.builder.configs(connection_parameters).create()

# Load LLAMA Model

In [3]:
HF_AUTH_TOKEN = "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" #Your token from Hugging Face

In [4]:
from transformers import pipeline
from snowflake.ml.model.models import huggingface_pipeline

llama_model = huggingface_pipeline.HuggingFacePipelineModel(task="text-generation",
                                                            model="meta-llama/Llama-2-7b-chat-hf",
                                                            token=HF_AUTH_TOKEN,
                                                            return_full_text=False,
                                                            max_new_tokens=100) 

# Register the model

In [5]:
from snowflake.ml.registry import model_registry

In [6]:
registry_name = 'EASY_LLAMA_7B'
schema_name = 'LLM_REGISTRY'

model_registry.create_model_registry(session= session,
                                     database_name= registry_name,
                                     schema_name= schema_name)

registry = model_registry.ModelRegistry(session= session,
                                        database_name= registry_name,
                                        schema_name= schema_name)

create_model_registry() is in private preview since 0.2.0. Do not use it in production. 


In [7]:
MODEL_NAME = "LLAMA2_MODEL_7b_CHAT"
MODEL_VERSION = "3"

llama_model_ref= registry.log_model(
    model_name=MODEL_NAME,
    model_version=MODEL_VERSION,
    model=llama_model
)

llama_model_ref



# Deploy Model

In [8]:
from snowflake.ml.model import deploy_platforms

llama_model_ref.deploy(
  deployment_name="llama_predict",
  platform= deploy_platforms.TargetPlatform.SNOWPARK_CONTAINER_SERVICES,   
  options={
            "compute_pool": "SKHARA_COMPUTE_GPU3",
            "num_gpus": 1
    },
)



{'name': 'EASY_LLAMA_7B.LLM_REGISTRY.llama_predict',
 'platform': <TargetPlatform.SNOWPARK_CONTAINER_SERVICES: 'SNOWPARK_CONTAINER_SERVICES'>,
 'target_method': '__call__',
 'signature': ModelSignature(
                     inputs=[
                         FeatureSpec(dtype=DataType.STRING, name='inputs')
                     ],
                     outputs=[
                         FeatureSpec(dtype=DataType.STRING, name='outputs')
                     ]
                 ),
 'options': {'compute_pool': 'SKHARA_COMPUTE_GPU3', 'num_gpus': 1},
 'details': {'image_name': 'sfsenorthamerica-fcto-spc.registry.snowflakecomputing.com/easy_llama_7b/llm_registry/snowml_repo/116da812e88f2751324c6a16eb00de3726ed06a3:latest',
  'service_spec': "spec:\n  container:\n  - env:\n      MODEL_ZIP_STAGE_PATH: EASY_LLAMA_7B.LLM_REGISTRY.snowml_model_7b8a20b86d4611ee9fe20a72b796458c/7b8a20b86d4611ee9fe20a72b796458c.zip\n      NUM_WORKERS: 1\n      SNOWML_USE_GPU: true\n      TARGET_METHOD: __call__\n     

# Test-driving the deployment

### Load Data

In [9]:
import pandas as pd
news_dataset = pd.read_json("News_Category_Dataset_v3.json", lines=True).convert_dtypes()

NEWS_DATA_TABLE_NAME = "NEWS_DATASET"
news_dataset_sp_df = session.create_dataframe(news_dataset)
news_dataset_sp_df.write.mode("overwrite").save_as_table(NEWS_DATA_TABLE_NAME)

### Input: Prompt Engineering

In [105]:
sdf_data = session.table('NEWS_DATASET').limit(5)

In [106]:
df = sdf_data.to_pandas()

In [107]:
def add_prompt(question):
    prompt = f'''[INST] <>
    Your output will be parsed by a computer program as a JSON object. Please respond ONLY with valid json that conforms to this JSON schema: {{"properties": {{"category": {{"type": "string","description": "The category that the news should belong to."}},"keywords": {{"type": "array":"description": "The keywords that are mentioned in the news.","items": [{{"type": "string"}}]}},"importance": {{"type": "number","description": "A integer from 1 to 10 to show if the news is important. The higher the number, the more important the news is."}}}},"required": ["properties","keywords","importance"]}}

    As an example-
    Input: "Residents ordered to evacuate amid threat of growing wildfire in Washington state, medical facilities sheltering in place"
    Output Json: {{"category": "Natural Disasters","keywords": ["evacuate", "wildfire", "Washington state", "medical facilities"],"importance": 8}}
    <>
    
    Actual Input: {question}
    [/INST]'''
    return prompt

In [108]:
df['inputs'] = df['short_description'].apply(add_prompt)
print(df['inputs'].iloc[0])

[INST] <>
    Your output will be parsed by a computer program as a JSON object. Please respond ONLY with valid json that conforms to this JSON schema: {"properties": {"category": {"type": "string","description": "The category that the news should belong to."},"keywords": {"type": "array":"description": "The keywords that are mentioned in the news.","items": [{"type": "string"}]},"importance": {"type": "number","description": "A integer from 1 to 10 to show if the news is important. The higher the number, the more important the news is."}},"required": ["properties","keywords","importance"]}

    As an example-
    Input: "Residents ordered to evacuate amid threat of growing wildfire in Washington state, medical facilities sheltering in place"
    Output Json: {"category": "Natural Disasters","keywords": ["evacuate", "wildfire", "Washington state", "medical facilities"],"importance": 8}
    <>
    
    Actual Input: Health experts said it is too early to predict whether demand would mat

In [77]:
res = llama_model_ref.predict(
    deployment_name= 'llama_predict',
    data= df[['inputs']]
)

### Output: Clean Output

In [109]:
import json
import re

def format_output(output_string):
    parsed_list = json.loads(output_string)
    generated_text = parsed_list[0]['generated_text']
    generated_text
    
    start_idx = generated_text.find('{')
    end_idx = generated_text.rfind('}') + 1  # +1 to include the closing brace
    
    # Extract the JSON string and parse it into a dictionary
    json_str = generated_text[start_idx:end_idx]
    json_dict = json.loads(json_str)
    
    # Now json_dict contains the formatted output
    return json_dict

In [79]:
for i in range(len(df)):
    print('\n\n **** ****')
    print(df['short_description'].iloc[i])
    print(format_output(res['outputs'].iloc[i]))



 **** ****
Health experts said it is too early to predict whether demand would match up with the 171 million doses of the new boosters the U.S. ordered for the fall.
{'category': 'Health', 'keywords': ['demand', 'boosters', 'fall', 'U.S.'], 'importance': 6}


 **** ****
He was subdued by passengers and crew when he fled to the back of the aircraft after the confrontation, according to the U.S. attorney's office in Los Angeles.
{'category': 'Crime', 'keywords': ['flight', 'passengers', 'crew', 'confrontation', "U.S. attorney's office"], 'importance': 7}


 **** ****
"Until you have a dog you don't understand what could be eaten."
{'category': 'Pets', 'keywords': ['dog', 'food'], 'importance': 5}


 **** ****
"Accidentally put grown-up toothpaste on my toddler’s toothbrush and he screamed like I was cleaning his teeth with a Carolina Reaper dipped in Tabasco sauce."
{'category': 'Humor', 'keywords': ['toothpaste', 'toddler', 'screamed', 'Carolina Reaper', 'Tabasco sauce'], 'importance'

# Get Deployment Again

In [113]:
from snowflake.ml.registry import model_registry
registry_name = 'EASY_LLAMA_7B'
schema_name = 'LLM_REGISTRY'

# model_registry.create_model_registry(session= session,
#                                      database_name= registry_name,
#                                      schema_name= schema_name)

registry = model_registry.ModelRegistry(session= session,
                                        database_name= registry_name,
                                        schema_name= schema_name)

In [104]:
model_list = registry.list_models()
model_list.to_pandas()

Unnamed: 0,CREATION_CONTEXT,CREATION_ENVIRONMENT_SPEC,CREATION_ROLE,CREATION_TIME,ID,INPUT_SPEC,NAME,OUTPUT_SPEC,RUNTIME_ENVIRONMENT_SPEC,TYPE,URI,VERSION,ARTIFACT_IDS,DESCRIPTION,METRICS,TAGS,REGISTRATION_TIMESTAMP
0,,"{\n ""python"": ""3.9.17""\n}","""SPC_USER_ROLE""",2023-10-17 14:23:44.456000-07:00,710e39fc6d3311eea1ce0a72b796458c,,LLAMA2_MODEL_7b_CHAT,,,huggingface_pipeline,sfc://EASY_LLAMA_7B.LLM_REGISTRY.SNOWML_MODEL_...,1,,,,,2023-10-17 14:23:46.020000-07:00
1,,"{\n ""python"": ""3.9.17""\n}","""SPC_USER_ROLE""",2023-10-17 16:37:23.264000-07:00,1dd707926d4611eea1ce0a72b796458c,,LLAMA2_MODEL_7b_CHAT,,,huggingface_pipeline,sfc://EASY_LLAMA_7B.LLM_REGISTRY.SNOWML_MODEL_...,2,,,,,2023-10-17 16:37:24.567000-07:00
2,,"{\n ""python"": ""3.9.17""\n}","""SPC_USER_ROLE""",2023-10-17 16:40:08.009000-07:00,7b8a20b86d4611ee9fe20a72b796458c,,LLAMA2_MODEL_7b_CHAT,,,huggingface_pipeline,sfc://EASY_LLAMA_7B.LLM_REGISTRY.SNOWML_MODEL_...,3,[],,,,2023-10-17 16:40:09.367000-07:00


In [102]:
model_name = 'LLAMA2_MODEL_7b_CHAT'

In [103]:
model = model_registry.ModelReference(registry=registry, model_name=model_name, model_version='3')
print("Registered new model:", model)

Registered new model: <snowflake.ml.registry.model_registry.ModelReference object at 0x30890fa90>


In [115]:
res = model.predict(
    deployment_name= 'llama_predict',
    data= df[['inputs']]
)

In [116]:
for i in range(len(df)):
    print('\n\n **** ****')
    print(df['short_description'].iloc[i])
    print(format_output(res['outputs'].iloc[i]))



 **** ****
Health experts said it is too early to predict whether demand would match up with the 171 million doses of the new boosters the U.S. ordered for the fall.
{'category': 'Health', 'keywords': ['boosters', 'demand', 'fall', 'U.S.'], 'importance': 6}


 **** ****
He was subdued by passengers and crew when he fled to the back of the aircraft after the confrontation, according to the U.S. attorney's office in Los Angeles.
{'category': 'Society', 'keywords': ['confrontation', 'flight', 'passengers', 'crew', "U.S. attorney's office"], 'importance': 7}


 **** ****
"Until you have a dog you don't understand what could be eaten."
{'category': 'Quotes', 'keywords': ['dog', 'eaten'], 'importance': 5}


 **** ****
"Accidentally put grown-up toothpaste on my toddler’s toothbrush and he screamed like I was cleaning his teeth with a Carolina Reaper dipped in Tabasco sauce."
{'category': 'Society and Culture', 'keywords': ['toothpaste', 'toothbrush', 'toddler', 'scream'], 'importance': 5}
