In [1]:
import os

import numpy as np
import pandas as pd


import matplotlib.pyplot as plt
%matplotlib inline


from sklearn_pandas import DataFrameMapper


from sklearn.datasets import load_iris

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier

from sklearn.pipeline import Pipeline


from sklearn2pmml.decoration import ContinuousDomain
from sklearn2pmml.pipeline import PMMLPipeline

from sklearn2pmml import sklearn2pmml

In [2]:
iris_data = load_iris()

X = pd.DataFrame(data=iris_data.data,
                 columns=['_'.join(feature_name.split()[:2]) for feature_name in iris_data.feature_names])
y = pd.DataFrame(data=iris_data.target,
                 columns=['species'])

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3)

In [3]:
iris_train = pd.concat([X_train,
                        y_train],
                       axis=1)
iris_train.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
101,5.8,2.7,5.1,1.9,2
102,7.1,3.0,5.9,2.1,2
54,6.5,2.8,4.6,1.5,1
27,5.2,3.5,1.5,0.2,0
76,6.8,2.8,4.8,1.4,1


In [4]:
iris_test = pd.concat([X_test,
                       y_test],
                      axis=1)
iris_test.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
122,7.7,2.8,6.7,2.0,2
132,6.4,2.8,5.6,2.2,2
143,6.8,3.2,5.9,2.3,2
80,5.5,2.4,3.8,1.1,1
62,6.0,2.2,4.0,1.0,1


In [5]:
pipeline = PMMLPipeline([
    ('mapper',
     DataFrameMapper([
         (X_train.columns.values,
          [ContinuousDomain(),
           SimpleImputer(),
           StandardScaler()])])),
    ('pca',
     PCA(n_components=3)),
    ('selector',
     SelectKBest(k=2)),
    ('classifier',
     DecisionTreeClassifier())
])

pipeline.fit(iris_train,
             iris_train['species']);

In [6]:
pipeline.predict(X_test)

array([1, 0, 1, 2, 2, 1, 1, 1, 2, 0, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 2,
       0, 0, 2, 1, 0, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 0, 0, 2, 2, 2, 0, 2,
       2])

In [7]:
sklearn2pmml(pipeline,
             'iris_pipeline.pmml',
             with_repr = True)