### Import nyoka

In [1]:
from nyoka import model_to_pmml

### Scripts

In [2]:
def script1():
    r3 = r1+r2
    
def script2():
    r6 = r1+r2+r3-r4

### Pipeline object 1 with pre-processing

In [3]:
import pandas as pd
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn_pandas import DataFrameMapper
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

iris = datasets.load_iris()
irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
irisd['Species'] = iris.target

features = irisd.columns.drop('Species')
target = 'Species'

pipeline_obj1 = Pipeline([
    ("mapping", DataFrameMapper([
    (['sepal length (cm)', 'sepal width (cm)'], StandardScaler()) , 
    (['petal length (cm)', 'petal width (cm)'], Imputer())
    ])),
    ("rfc", LogisticRegression())
])

pipeline_obj1.fit(irisd[features], irisd[target])

Pipeline(memory=None,
     steps=[('mapping', DataFrameMapper(default=False, df_out=False,
        features=[(['sepal length (cm)', 'sepal width (cm)'], StandardScaler(copy=True, with_mean=True, with_std=True)), (['petal length (cm)', 'petal width (cm)'], Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbo...ty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])

### Pipeline object 2 with preprocessing

In [4]:
import pandas as pd
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn_pandas import DataFrameMapper
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

iris = datasets.load_iris()
irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
irisd['Species'] = iris.target

features = irisd.columns.drop('Species')
target = 'Species'

pipeline_obj2 = Pipeline([
    ("mapping", DataFrameMapper([
    (['sepal length (cm)', 'sepal width (cm)'], StandardScaler()) , 
    (['petal length (cm)', 'petal width (cm)'], Imputer())
    ])),
    ("rfc", RandomForestClassifier(n_estimators = 100))
])

pipeline_obj2.fit(irisd[features], irisd[target])

Pipeline(memory=None,
     steps=[('mapping', DataFrameMapper(default=False, df_out=False,
        features=[(['sepal length (cm)', 'sepal width (cm)'], StandardScaler(copy=True, with_mean=True, with_std=True)), (['petal length (cm)', 'petal width (cm)'], Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbo...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))])

### Keras model

In [7]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD

# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)

model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='relu', input_dim=20))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=20,
          batch_size=128, verbose=0)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


<keras.callbacks.History at 0x1d72c9134a8>

### Export to PMML - Case 1

In [6]:
toExportDict={
    'model1':{
        'preProcessingScript':{'scripts':[script1,script2], 'scriptpurpose':['train','score']},
        'pipelineObj':pipeline_obj1,
        'featuresUsed':['sepal length (cm)', 'sepal width (cm)'],
        'targetName':'Species',
        'postProcessingScript':{'scripts':[script1], 'scriptpurpose':['postprocess']},
        'taskType': 'trainAndscore'
    },
    'model2':{
        'preProcessingScript':{'scripts':[script1,script2], 'scriptpurpose':['train','score']},
        'pipelineObj':pipeline_obj2,
        'featuresUsed':['petal length (cm)', 'petal width (cm)'],
        'targetName':'mpg',
        'postProcessingScript':{'scripts':[script1], 'scriptpurpose':['postprocess']},
        'taskType': 'score'
    }   
}

model_to_pmml(toExportDict, pmml_f_name="test1.pmml")

<PMML43Ext.PMML at 0x1d727de2278>

### Export to PMML - Case 2

In [9]:
toExportDict={
    'model1':{
        'hyperParameters':'',
        'preProcessingScript':{'scripts':[script1,script2], 'scriptpurpose':['train','score']},
        'pipelineObj':pipeline_obj1,
        'featuresUsed':['sepal length (cm)', 'sepal width (cm)'],
        'targetName':'Species',
        'postProcessingScript':{'scripts':[script1], 'scriptpurpose':['postprocess']},
        'taskType': 'trainAndscore'
    },
    'model2':{
        'hyperParameters':'',
        'preProcessingScript':{'scripts':[script1,script2], 'scriptpurpose':['train','score']},
        'pipelineObj':model,
        'featuresUsed':None,
        'targetName':None,
        'postProcessingScript':{'scripts':[script1], 'scriptpurpose':['postprocess']},
        'taskType': 'score'
    }   
}

model_to_pmml(toExportDict, pmml_f_name="test3.pmml")

<PMML43Ext.PMML at 0x1d72dd60358>