# iVSM Demo

In [None]:
import os
import time
import pandas as pd

from sklearn2pmml.pipeline import PMMLPipeline
from sklearn2pmml import sklearn2pmml
from sklearn.ensemble import RandomForestClassifier

from teradataml import create_context
from teradataml.context.context import get_connection
from teradataml.dataframe.copy_to import copy_to_sql

### Setup connection

In [None]:
engine = create_context(host="host.docker.internal", username="ivsm_user", password="ivsm_user")
conn = get_connection()
cursor = engine.raw_connection().cursor()

### Read and upload dataset to Vantage

In [None]:
iris_df = pd.read_csv("https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv")
iris_df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "species"]

iris_df.head()

In [None]:
from teradataml.dataframe.copy_to import copy_to_sql
from teradataml.dataframe.dataframe import DataFrame

df = DataFrame("iris_train")

In [None]:
copy_to_sql(iris_df.iloc[:120], table_name = "iris_train", index=True, index_label="idx", if_exists="replace")
copy_to_sql(iris_df.iloc[120:], table_name = "iris_to_score", index=True, index_label="idx", if_exists="replace")

### Read train data from DB and prepare datasets 

In [None]:
import pandas as pd
train_pd = pd.read_sql("select * from ivsm_user.iris_train", conn)

X = train_pd[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = train_pd[['species']]

In [None]:
train_pd.head()

### Train the model

In [None]:
pipeline = PMMLPipeline([
    ("classifier", RandomForestClassifier())
])
pipeline.fit(X, y.values.ravel())

### Export model to PMML

In [None]:
sklearn2pmml(pipeline, "model.pmml", with_repr = True)

### Load model to DB

In [None]:
#clean before fill
cursor.execute("delete from ivsm_user.aoa_ivsm_models where model_version = 'iris_model'")
model_bytes = open("iris_model.pmml", "rb").read()
cursor.execute("insert into ivsm_user.aoa_ivsm_models (model_version, model_id, project_id, model) values(?,?,?,?)", 
               ("iris_model", "iris_model", "project", model_bytes))

### Score the data

In [None]:
pd.read_sql("select * from ivsm_user.aoa_ivsm_models", conn)

In [None]:
pd.read_sql("select * from ivsm_user.aoa_model_61ce3b37", conn)

In [None]:
#clean before fill
cursor.execute("delete from ivsm_user.iris_scoring_results")

ts = time.time()

query="""
insert into ivsm_user.iris_scoring_results
select * from ivsm.IVSM_SCORE(
    on ivsm_user.iris_to_score
    on ivsm_user.aoa_model_61ce3b37 DIMENSION
    using
        ModelID('61ce3b37-90d0-45e1-aa93-b4b0995ac52f')
        ColumnsToPreserve('idx')
        ModelType('PMML')
        ModelSpecificSettings('PMML_OUTPUT_TYPE=ALL')
) sc;

"""

cursor.execute(query)

print("Time, spent for scoring (seconds):", "%.2f"%(time.time() - ts))

In [None]:
# Sample scoring results

pd.options.display.max_colwidth = 250
pd.read_sql("select top 10 * from ivsm_user.iris_scoring_results", conn)

In [None]:
# Aggregate output and JSON access


query="""
select
cast(score_result as json).JSONExtractValue('$.target_fields.y') as "species",
count(*) as cnt
from 
ivsm_user.iris_scoring_results
group by 1
"""

pd.read_sql(query, conn)