# Deploy a Hyper-Segmented Model with TDStone2 in less than a minute
# Using Scikit Learn Pipeline

## 1 - Connect to Vantage

In [1]:
import teradataml as tdml
import json
import warnings
warnings.filterwarnings('ignore')
tdml.__version__

'20.00.00.03'

In [2]:
with open('../connections/me.json','r') as f:
    Param_ = json.load(f)

with open('../connections/csae2.json','r') as f:
    Param = json.load(f)

with open('../connections/csae.json','r') as f:
    Param_ = json.load(f)

with open('../connections/vantage24.json','r') as f:
    Param_ = json.load(f)

tdml.create_context(**Param)

Engine(teradatasql://:***@sto-xo0saiv0ot1sxmj2.env.clearscape.teradata.com?DATABASE=demo_user&user=DEMO_USER)

## 2 - The Hyper-Segmented Dataset

In [3]:
dataset = tdml.DataFrame(tdml.in_schema(Param['database'],'dataset_00'))
dataset

Partition_ID,ID,X1,X2,X3,X4,X5,X6,X7,X8,X9,flag,Y1,Y2,FOLD
4,29648,0.2828249521820943,-1.0703957567450235,-1.7195871460461143,0.0986731411509142,2.629286113142945,-1.13725681301724,0.258325751306629,0.6074351282431332,-0.5536088422409077,0,0.4611241273813071,1,test
4,29651,0.6684390731457177,-0.1177596410437464,0.5424975414395571,-1.6390967569621533,-0.3698766978640391,0.1905499604186497,-1.0819066069820205,-1.3112922389467692,-1.0081519902189835,1,-1.0849818092385104,0,test
4,29652,1.0278822444662743,0.0513824429945531,0.5670716341186771,0.1060029501453061,1.1756205739131305,0.7480286296749381,-0.5816091052096549,-0.9525826081949808,-0.9388201991236989,0,-1.097017086845877,0,test
4,29653,-0.2548656740144003,0.702755753627121,0.3589478098762991,0.1399641718877776,0.0724246758635471,0.2472791889277092,-0.6332113216549093,-0.1527714913310329,0.379743155138039,0,0.0045752221415969,1,test
4,29656,-0.986502621331066,-0.28481950474934,-0.0675830037762455,0.5471628321129705,-1.590327288924158,0.5223728136949378,-1.1821746627947485,0.3482962183213171,-0.9542402918504372,1,-0.2409118030086716,0,test
4,29657,-0.4367522868124594,-0.0446835885759712,-0.6137165229199535,0.3863132734859432,-0.2992324836329113,-0.4327861715099723,-0.0695347665942383,-1.3241766653033715,0.7027164024669853,1,0.1579968819382851,1,test
4,29655,0.7284711693816118,-0.4941016488128337,0.6065375390243897,-0.211035301676198,1.2191370921647808,-0.2148407179375465,-0.2821973288760446,-1.9664477193262777,-1.1690710430987223,0,-1.067045055038608,0,test
4,29649,0.6790031066881759,0.4500589671905496,0.5955921116093522,0.8887681753414973,1.0453881036438215,-0.1695029454459193,-0.3399336173250339,-0.1515121398525592,-1.322191005423316,1,-0.5104725405020154,0,test
4,29647,1.254546828504672,0.7414044713069771,0.817800474289231,0.5526914119773194,0.4892795954876792,1.7922746804936651,-0.5642548636971005,1.596741726630448,-0.8901255724688466,0,2.990229851073103,1,test
4,29644,0.3274161320177766,-0.5985959689559489,2.255029156227695,-0.1999002764174987,0.994577024635114,-0.6840761116404813,1.2721291568713036,0.8893120069031607,0.9173607923291388,0,1.2601971093468245,1,test


In [4]:
dataset.shape

(40000, 15)

In [5]:
summary = dataset[['Partition_ID','FOLD','ID']].groupby(['Partition_ID','FOLD']).count()
summary.sort(['Partition_ID','FOLD'])

Partition_ID,FOLD,count_ID
1,test,8000
1,train,2000
4,test,8000
4,train,2000
6,test,8000
6,train,2000
9,test,8000
9,train,2000


In [6]:
summary.shape

(8, 3)

## 3 - Hyper-segmented model deployment

### 3.1 - Engineering of the scikit-learn classifier pipeline

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
# Example usage
steps_classifier = [
    ('scaler', StandardScaler()),
    ('classifier', RandomForestClassifier(
        max_depth = 5,
        n_estimators = 95 
    ))]

### 3.2 - Deployment of the scikit-learn pipeline

In [8]:
from tdstone2.tdshypermodel import HyperModel
from tdstone2.tdstone import TDStone
sto = TDStone(schema_name = Param['database'], SEARCHUIFDBPATH = Param['user'])

In [9]:
model_parameters = {
    "target": 'Y2',
    "column_categorical": ['flag','Y2'],
    "column_names_X": ['X1','X2','X3','X4','X5','X6','X7','X8','X9','flag']
}

In [10]:
%%time
model = HyperModel(tdstone            = sto,
                   metadata           = {'project': 'test'},
                   skl_pipeline_steps = steps_classifier,
                   model_parameters   = model_parameters,
                   dataset            = tdml.in_schema(Param['database'],'dataset_00'),
                   id_row             = 'ID',
                   id_partition       = 'Partition_ID',
                   id_fold            = 'FOLD',
                   fold_training      = 'train')

registration of mapper with id = 05842693-ddf1-4589-8d7b-a7ea4e952b95
creation of dedicated mapper table : demo_user.TDS_MAPPER_05842693_ddf1_4589_8d7b_a7ea4e952b95
creation of the on clause view demo_user.TDS_ON_CLAUSE_TRAINING_05842693_ddf1_4589_8d7b_a7ea4e952b95
creation of the sto view demo_user.TDS_STO_TRAINING_05842693_ddf1_4589_8d7b_a7ea4e952b95
registration of mapper with id = a93604aa-2204-45a6-b4f5-1edab0414da1
creation of dedicated mapper table : demo_user.TDS_MAPPER_a93604aa_2204_45a6_b4f5_1edab0414da1
creation of the on clause view demo_user.TDS_ON_CLAUSE_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1
creation of the volatile table on on clause view demo_user.TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1
creation of the sto view demo_user.TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1
register hyper model with id : 654b028b-741e-4a46-8e5d-d966ac8a07b1
hyper model : 654b028b-741e-4a46-8e5d-d966ac8a07b1
CPU times: total: 141 ms
Wall time: 28.1 s


In [11]:
sto.list_hyper_models()

CREATION_DATE,ID,ID_MODEL,ID_MAPPER_TRAINING,ID_MAPPER_SCORING,METADATA
2025-01-08 05:26:48.160000-05:,654b028b-741e-4a46-8e5d-d966ac8a07b1,1feb2a86-622c-4c56-8e94-4c367d1a232e,05842693-ddf1-4589-8d7b-a7ea4e952b95,a93604aa-2204-45a6-b4f5-1edab0414da1,"{""user"": ""dm250067"", ""project"": ""test""}"


In [12]:
id_mapper = sto.list_hyper_models()[['CREATION_DATE','ID_MAPPER_TRAINING']].sort('CREATION_DATE',ascending=False).to_pandas()['ID_MAPPER_TRAINING'].values[0]
id_mapper

'05842693-ddf1-4589-8d7b-a7ea4e952b95'

## 4 - Execution of the deployed hypermodel

### 4.1 - Models Training

In [13]:
%%time
model.train()

insert trained models in demo_user.TDS_TRAINED_MODELS_05842693_ddf1_4589_8d7b_a7ea4e952b95
- access pickle models in demo_user.V_TDS_TRAINED_MODELS_05842693_ddf1_4589_8d7b_a7ea4e952b95_PICKLE
- access  onnx  models in demo_user.V_TDS_TRAINED_MODELS_05842693_ddf1_4589_8d7b_a7ea4e952b95_ONNX
- onnx byom catalog    in demo_user.V_TDS_TRAINED_MODELS_05842693_ddf1_4589_8d7b_a7ea4e952b95_BYOM_CATALOG
CPU times: total: 31.2 ms
Wall time: 9.15 s


query = f"SELECT * FROM {Param['database']}.{'TDS_TRAINED_MODELS_'+ID_MAPPER_TRAINING.replace('-','_')}"
print(query)
tdml.DataFrame.from_query("query")

In [14]:
model.get_trained_models().groupby('TD_TIMECODE').count()

demo_user TDS_MODEL_REPOSITORY


TD_TIMECODE,count_ID_PROCESS,count_Partition_ID,count_ID_MODEL,count_ID_TRAINED_MODEL,count_MODEL_TYPE,count_STATUS
9999-01-01 00:00:00.000000-04:,4,4,4,4,4,4


### 4.2 - Model Scoring

In [15]:
%%time
model.score()

creation of the on clause view demo_user.TDS_ON_CLAUSE_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1
creation of the volatile table on on clause view demo_user.TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1
insert scores in demo_user.TDS_SCORES_a93604aa_2204_45a6_b4f5_1edab0414da1
CPU times: total: 31.2 ms
Wall time: 20.7 s


model.get_model_predictions().groupby('TD_TIMECODE').count()

In [16]:
model.get_model_predictions()

demo_user TDS_SCORES_a93604aa_2204_45a6_b4f5_1edab0414da1


TD_TIMECODE,ID_PROCESS,ID_TRAINED_MODEL,Partition_ID,ID,Y2_proba_0,Y2_prediction,Y2_proba_1
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,fc56bacc-46fc-4d5d-9e01-185507aff61c,4,28920,0.506687021404579,0.0,0.4933129785954211
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,d52d31c3-861f-4531-9962-a454537ce4b8,1,8482,0.5692973860684621,0.0,0.4307026139315378
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,fc56bacc-46fc-4d5d-9e01-185507aff61c,4,25937,0.4163261238729427,1.0,0.5836738761270576
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,57e118d6-b9b8-4d65-97a5-58c2ef185a94,9,18632,0.518110961183082,0.0,0.4818890388169181
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,57e118d6-b9b8-4d65-97a5-58c2ef185a94,9,18126,0.5057575063087655,0.0,0.4942424936912345
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,57e118d6-b9b8-4d65-97a5-58c2ef185a94,9,12779,0.4496898608965424,1.0,0.5503101391034577
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,d52d31c3-861f-4531-9962-a454537ce4b8,1,8002,0.4450077524581334,1.0,0.5549922475418664
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,57e118d6-b9b8-4d65-97a5-58c2ef185a94,9,18885,0.5137767972279962,0.0,0.4862232027720039
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,fc56bacc-46fc-4d5d-9e01-185507aff61c,4,27517,0.5136606167115186,0.0,0.4863393832884814
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,fc56bacc-46fc-4d5d-9e01-185507aff61c,4,26845,0.5184381906873906,0.0,0.4815618093126094


In [17]:
model.get_model_predictions().groupby('TD_TIMECODE').count()

demo_user TDS_SCORES_a93604aa_2204_45a6_b4f5_1edab0414da1


TD_TIMECODE,count_ID_PROCESS,count_ID_TRAINED_MODEL,count_Partition_ID,count_ID,count_Y2_proba_0,count_Y2_prediction,count_Y2_proba_1
9999-01-01 00:00:00.000000-04:,40000,40000,40000,40000,40000,40000,40000


In [18]:
model.get_model_predictions(denormalized_view=False)

demo_user TDS_SCORES_a93604aa_2204_45a6_b4f5_1edab0414da1


TD_TIMECODE,ID_PROCESS,Partition_ID,ID,FEATURE_NAME,FEATURE_VALUE,FEATURE_TYPE,STATUS,ID_TRAINED_MODEL
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,6,32739,Y2_prediction,1.0,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""3d227dc3-16bf-47ab-a689-ce36e4704da5"", ""batch_size"": 9999}",a67f5aa1-95c3-4447-a430-2164389f26e6
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,9,12219,Y2_prediction,0.0,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""8833c748-5d4f-45f0-9786-08c678cf2881"", ""batch_size"": 9999}",57e118d6-b9b8-4d65-97a5-58c2ef185a94
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,9,12219,Y2_proba_0,0.589502095573109,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""8833c748-5d4f-45f0-9786-08c678cf2881"", ""batch_size"": 9999}",57e118d6-b9b8-4d65-97a5-58c2ef185a94
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,1,7957,Y2_proba_0,0.441110175402526,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""a52ea8ed-d290-4c13-a74a-46661d9fe7c4"", ""batch_size"": 9999}",d52d31c3-861f-4531-9962-a454537ce4b8
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,1,7957,Y2_prediction,1.0,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""a52ea8ed-d290-4c13-a74a-46661d9fe7c4"", ""batch_size"": 9999}",d52d31c3-861f-4531-9962-a454537ce4b8
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,4,29769,Y2_proba_1,0.5069693058971968,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""2920aee2-2b53-4098-8a8f-b1dca7d4ca64"", ""batch_size"": 9999}",fc56bacc-46fc-4d5d-9e01-185507aff61c
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,1,7957,Y2_proba_1,0.558889824597474,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""a52ea8ed-d290-4c13-a74a-46661d9fe7c4"", ""batch_size"": 9999}",d52d31c3-861f-4531-9962-a454537ce4b8
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,9,12219,Y2_proba_1,0.410497904426891,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""8833c748-5d4f-45f0-9786-08c678cf2881"", ""batch_size"": 9999}",57e118d6-b9b8-4d65-97a5-58c2ef185a94
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,6,32739,Y2_proba_0,0.4183413105151959,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""3d227dc3-16bf-47ab-a689-ce36e4704da5"", ""batch_size"": 9999}",a67f5aa1-95c3-4447-a430-2164389f26e6
9999-01-01 00:00:00.000000-04:,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,6,32739,Y2_proba_1,0.581658689484804,,"{""error"": ""successful"", ""batch"": 2, ""job"": ""3d227dc3-16bf-47ab-a689-ce36e4704da5"", ""batch_size"": 9999}",a67f5aa1-95c3-4447-a430-2164389f26e6


## 5 - A bit of administration: Model Lineage

### 5.1 - Access to the list of deployed codes

In [19]:
sto.list_codes()

ID,CODE_TYPE,METADATA
c88da75f-4c35-4fb4-8b2b-52105610adf7,python class,"{""user"": ""dm250067"", ""code_type"": ""python class"", ""project"": ""test"", ""script_path"": ""from memory""}"


### 5.2 - List of deployed models (code + parameters)

In [20]:
sto.list_models()

ID,ID_CODE,ARGUMENTS,METADATA
1feb2a86-622c-4c56-8e94-4c367d1a232e,c88da75f-4c35-4fb4-8b2b-52105610adf7,"{""sto_parameters"": {""columnnames"": [""Partition_ID"", ""ID"", ""X1"", ""X2"", ""X3"", ""X4"", ""X5"", ""X6"", ""X7"", ""X8"", ""X9"", ""flag"", ""Y1"", ""Y2"", ""FOLD""], ""float_columnames"": [""X1"", ""X2"", ""X3"", ""X4"", ""X5"", ""X6"", ""X7"", ""X8"", ""X9"", ""Y1""], ""integer_columnames"": [""Partition_ID"", ""ID"", ""flag"", ""Y2""], ""category_columns"": [], ""output_format"": [""pickle""]}, ""model_parameters"": {""target"": ""Y2"", ""column_categorical"": [""flag"", ""Y2""], ""column_names_X"": [""X1"", ""X2"", ""X3"", ""X4"", ""X5"", ""X6"", ""X7"", ""X8"", ""X9"", ""flag""], ""arguments"": {""classifier__max_depth"": 5, ""classifier__n_estimators"": 95}}}","{""user"": ""dm250067"", ""project"": ""test""}"


### 5.3 - List of available mappers (mapping between partitions and models or trained models)

In [21]:
sto.list_mappers()

ID,MAPPER_TYPE,TABLE_NAME,CODE_REPOSITORY,MODEL_REPOSITORY,TRAINED_MODEL_REPOSITORY,FEATURE_REPOSITORY,SCORES_REPOSITORY,DATASET_OBJECT,COL_ID_ROW,COL_ID_PARTITION,COL_FOLD,ON_CLAUSE_VIEW,STO_VIEW,METADATA
05842693-ddf1-4589-8d7b-a7ea4e952b95,training,TDS_MAPPER_05842693_ddf1_4589_8d7b_a7ea4e952b95,TDS_CODE_REPOSITORY,TDS_MODEL_REPOSITORY,TDS_TRAINED_MODELS_05842693_ddf1_4589_8d7b_a7ea4e952b95,,,"""demo_user"".""dataset_00""",ID,Partition_ID,FOLD,TDS_ON_CLAUSE_TRAINING_05842693_ddf1_4589_8d7b_a7ea4e952b95,TDS_STO_TRAINING_05842693_ddf1_4589_8d7b_a7ea4e952b95,"{""user"": ""dm250067""}"
a93604aa-2204-45a6-b4f5-1edab0414da1,scoring,TDS_MAPPER_a93604aa_2204_45a6_b4f5_1edab0414da1,TDS_CODE_REPOSITORY,TDS_MODEL_REPOSITORY,TDS_TRAINED_MODELS_05842693_ddf1_4589_8d7b_a7ea4e952b95,,TDS_SCORES_a93604aa_2204_45a6_b4f5_1edab0414da1,"""demo_user"".""dataset_00""",ID,Partition_ID,FOLD,TDS_ON_CLAUSE_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,TDS_STO_SCORING_a93604aa_2204_45a6_b4f5_1edab0414da1,"{""user"": ""dm250067""}"


### 5.4 - List of Hypermodels ( models and mappers mapping)

In [22]:
sto.list_hyper_models()

CREATION_DATE,ID,ID_MODEL,ID_MAPPER_TRAINING,ID_MAPPER_SCORING,METADATA
2025-01-08 05:26:48.160000-05:,654b028b-741e-4a46-8e5d-d966ac8a07b1,1feb2a86-622c-4c56-8e94-4c367d1a232e,05842693-ddf1-4589-8d7b-a7ea4e952b95,a93604aa-2204-45a6-b4f5-1edab0414da1,"{""user"": ""dm250067"", ""project"": ""test""}"


In [23]:
ID_MAPPER_TRAINING = sto.list_hyper_models().to_pandas().reset_index().sort_values('CREATION_DATE', ascending=False).ID_MAPPER_TRAINING.values[0]
ID_MAPPER_TRAINING

'05842693-ddf1-4589-8d7b-a7ea4e952b95'

In [24]:
tdml.DataFrame.from_query(f'CURRENT VALIDTIME SEL * FROM TDS_MAPPER_{ID_MAPPER_TRAINING.replace("-","_")}')

ID_MODEL,Partition_ID,STATUS,METADATA
1feb2a86-622c-4c56-8e94-4c367d1a232e,4,enabled,
1feb2a86-622c-4c56-8e94-4c367d1a232e,6,enabled,
1feb2a86-622c-4c56-8e94-4c367d1a232e,9,enabled,
1feb2a86-622c-4c56-8e94-4c367d1a232e,1,enabled,


In [25]:
ID_MAPPER_SCORING = sto.list_hyper_models().to_pandas().reset_index().sort_values('CREATION_DATE', ascending=False).ID_MAPPER_SCORING.values[0]
ID_MAPPER_SCORING

'a93604aa-2204-45a6-b4f5-1edab0414da1'

In [26]:
tdml.DataFrame.from_query(f'CURRENT VALIDTIME SEL * FROM TDS_MAPPER_{ID_MAPPER_SCORING.replace("-","_")}')

ID_TRAINED_MODEL,Partition_ID,ID,STATUS,METADATA
a67f5aa1-95c3-4447-a430-2164389f26e6,6,,enabled,
d52d31c3-861f-4531-9962-a454537ce4b8,1,,enabled,
fc56bacc-46fc-4d5d-9e01-185507aff61c,4,,enabled,
57e118d6-b9b8-4d65-97a5-58c2ef185a94,9,,enabled,


In [27]:
tdml.remove_context()

True