# scikit-learn → PMML

## Exporter: LinearSVC

### Data Set used: auto_mpg

### Steps:    

##### - Build the Pipeline with model and pre-processing (tf-idf vectorizer) using sklearn LinearSVC
##### - Build PMML using Nyoka exporter

### Model building (using pipeline) for auto-mpg Dataset

In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn_pandas import DataFrameMapper
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.svm import LinearSVC


df = pd.read_csv('auto-mpg.csv')

X = df.drop(['cylinders','model year','origin'],axis=1)
y = df['cylinders']
feature_names = X.columns
target_name = "cylinders"


pipeline_obj = Pipeline([
    ('mapper', DataFrameMapper([
        (['mpg','displacement','horsepower'],[MinMaxScaler()]),
        (['weight','acceleration'],[StandardScaler()]),
        ('car name', TfidfVectorizer())
    ])),
    ('model',LinearSVC())
])

pipeline_obj.fit(X,y)

Pipeline(memory=None,
     steps=[('mapper', DataFrameMapper(default=False, df_out=False,
        features=[(['mpg', 'displacement', 'horsepower'], [MinMaxScaler(copy=True, feature_range=(0, 1))]), (['weight', 'acceleration'], [StandardScaler(copy=True, with_mean=True, with_std=True)]), ('car name', TfidfVectorizer(analyzer='...ax_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0))])

### Export the Pipeline object into PMML using the Nyoka package

In [2]:
from nyoka import skl_to_pmml
skl_to_pmml(pipeline_obj,feature_names,target_name,"lsvc_tfidf_pmml.pmml")