### Setup Connection to Vantage

In [None]:
from teradataml.dataframe.dataframe import DataFrame
from tdextensions.distributed import DistDataFrame, DistMode
from teradataml import create_context
import pandas as pd
import numpy as np

pd.options.display.max_colwidth = 250

engine = create_context(host="host.docker.internal", username="ivsm_user", password="ivsm_user")

In [None]:
df = DataFrame("iris_train")
df.head()

### Perform a simple row level transformation 

In [None]:
def my_fun(row):
    return np.array([row.idx, row.sepal_length * row.sepal_width])

df = DistDataFrame("iris_train", dist_mode=DistMode.STO, sto_id="my_dumb_map")
df = df.map(lambda row: my_fun(row), 
            returns=[["idx", "INTEGER"], ["my_derived_col", "INTEGER"]])

df.to_pandas().head()

### Train a model per data partition

In [None]:
from sklearn.ensemble import RandomForestClassifier
import base64
import dill

def train(partition):
    X = partition[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
    y = partition[['species']]
    
    clf = RandomForestClassifier()
    clf.fit(X, y.values.ravel())
    
    return np.array([[partition.species.iloc[0], "my_model_id", base64.b64encode(dill.dumps(clf))]])

df = DistDataFrame("iris_train", dist_mode=DistMode.STO, sto_id="my_model_train")
df = df.map_partition(lambda partition: train(partition), 
                      partition_by="species",
                      returns=[["partition_id", "VARCHAR(255)"], 
                               ["model_id", "VARCHAR(255)"],
                               ["model_artefact", "CLOB"]])

df.to_pandas().head()