# scikit-learn → PMML


### Exporter: Gradient Boosting
### Data Set used: Titanic


### **STEPS**: 
- Build the Pipeline with preprocessing (using DataFrameMapper)
- Build PMML using Nyoka exporter

### Pre-processing, Model building (using pipeline) for Titanic data set

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Imputer, LabelEncoder, LabelBinarizer
from sklearn_pandas import DataFrameMapper
from sklearn.ensemble import GradientBoostingClassifier

titanic = pd.read_csv("titanic_train.csv")

titanic['Embarked'] = titanic['Embarked'].fillna('S')

features = list(titanic.columns.drop(['PassengerId','Name','Ticket','Cabin','Survived']))
target = 'Survived'

In [2]:
pipeline_obj = Pipeline([
    ("mapping", DataFrameMapper([
        (['Sex'], LabelEncoder()),
        (['Embarked'], LabelEncoder())
    ])),
    ("imp", Imputer(strategy="median"))
#     ("gbc", GradientBoostingClassifier(n_estimators = 10))
])

tt = pipeline_obj.fit_transform(titanic[features])

gbc = GradientBoostingClassifier(n_estimators = 10)

gbc.fit(tt,titanic[target])

  y = column_or_1d(y, warn=True)


GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=10,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

### Export the Pipeline object into PMML using the Nyoka package

In [3]:
from nyoka import model_to_pmml
pmml_file_name = "gb_pmml.pmml"

In [4]:
toExportDict={
    'model1':{
        'hyperparameters':None,
        'preProcessingScript':None,
        'pipelineObj':pipeline_obj,
        'modelObj':gbc,
        'featuresUsed':features,
        'targetName':target,
        'postProcessingScript':None,
        'taskType': 'trainAndscore'
    }
}
pmml = model_to_pmml(toExportDict, pmml_f_name=pmml_file_name)