# LGBM → PMML 

### Exporter: LGBMRegressor

### Data Set used: Auto


### **STEPS**: 
- Build the Pipeline with preprocessing 
- Build PMML using Nyoka exporter

### Pre-processing, Model building (using pipeline) for Auto data set

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import CountVectorizer
from lightgbm import LGBMRegressor,LGBMClassifier
from sklearn_pandas import DataFrameMapper
from sklearn.model_selection import train_test_split

auto = pd.read_csv('auto-mpg.csv')
X = auto.drop(['mpg'], axis=1)
y = auto['mpg']

feature_names = [name for name in auto.columns if name not in ('mpg')]

target_name='mpg'
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)
pipeline_obj = Pipeline([
    ('mapper', DataFrameMapper([
        ('car name', CountVectorizer()),
        (['displacement'],[StandardScaler()]) 
    ])),
    ('lgbmr',LGBMRegressor())
])
pipeline_obj.fit(x_train,y_train)

Pipeline(memory=None,
         steps=[('mapper',
                 DataFrameMapper(default=False, df_out=False,
                                 features=[('car name',
                                            CountVectorizer(analyzer='word',
                                                            binary=False,
                                                            decode_error='strict',
                                                            dtype=<class 'numpy.int64'>,
                                                            encoding='utf-8',
                                                            input='content',
                                                            lowercase=True,
                                                            max_df=1.0,
                                                            max_features=None,
                                                            min_df=1,
                                                          

In [2]:
pipeline_obj.predict(x_test)[0]

23.514978294580377

### Export the Pipeline object into PMML using the Nyoka package

In [3]:
from nyoka import lgb_to_pmml
lgb_to_pmml(pipeline_obj,feature_names,target_name,"lgbmr_pmml_preprocess.pmml")

In [4]:
x_test.to_csv("test.csv")

In [5]:
from nyoka.reconstruct import pmml_to_pipeline



In [6]:
from nyoka import PMML44 as pml
pmml = pml.parse("lgbmr_pmml_preprocess.pmml",True)

In [7]:
pp=pmml_to_pipeline.generate_skl_pipeline("lgbmr_pmml_preprocess.pmml")

In [8]:
pp.predict(x_test)

array([22.05589583, 16.01039357, 31.06273444, 13.49501308, 13.49501308,
       22.0743162 , 27.24776216, 24.64091376, 27.24776216, 24.22439045,
       20.14067172, 24.22439045, 27.24776216, 31.06273444, 13.08395063,
       13.32363937, 16.01039357, 16.01039357, 27.24776216, 28.4925652 ,
       13.49501308, 17.99139482, 28.4925652 , 13.32363937, 13.32363937,
       32.08713101, 24.22439045, 25.14047562, 14.09732717, 13.49501308,
       17.99139482, 31.06273444, 24.22439045, 27.24776216, 31.06273444,
       13.08395063, 29.08732713, 25.14047562, 13.32363937, 24.22439045,
       28.4925652 , 14.09732717, 23.52078561, 14.09732717, 16.35795301,
       16.01039357, 27.24776216, 23.65049444, 16.35795301, 31.06273444,
       16.01039357, 16.01039357, 16.95889481, 24.22439045, 16.35795301,
       24.22439045, 31.06273444, 32.08713101, 16.01039357, 31.06273444,
       23.52078561, 31.06273444, 32.08713101, 16.01039357, 18.0191349 ,
       16.95889481, 31.06273444, 31.06273444, 13.32363937, 13.49

In [9]:
"car name".split(r'\\s+')

['car name']