# scikit-learn → PMML

## Exporter: LinearSVC

### Data Set used: auto_mpg

### Steps:    

##### - Build the Pipeline with model and pre-processing (tf-idf vectorizer) using sklearn LinearSVC
##### - Build PMML using Nyoka exporter

### Model building (using pipeline) for auto-mpg Dataset

In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn_pandas import DataFrameMapper
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import LinearSVC


df = pd.read_csv('auto-mpg.csv')

X = df.drop(['cylinders','model year','origin'],axis=1)
y = df['cylinders']
feature_names = X.columns
target_name = "cylinders"


pipeline_obj = Pipeline([
    ('mapper', DataFrameMapper([
        (['mpg','displacement','horsepower'],[MinMaxScaler()]),
        (['weight','acceleration'],[StandardScaler()]),
        ('car name', TfidfVectorizer())
    ]))
#     ('model',LinearSVC())
])

xx = pipeline_obj.fit_transform(X)

lsvc= LinearSVC()
lsvc.fit(xx,y)

LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

### Export the Pipeline object into PMML using the Nyoka package

In [2]:
from nyoka import model_to_pmml
pmml_file_name = "lsvc_tfidf_pmml.pmml"

In [3]:
toExportDict={
    'model1':{
        'hyperparameters':None,
        'preProcessingScript':None,
        'pipelineObj':pipeline_obj,
        'modelObj':lsvc,
        'featuresUsed':feature_names,
        'targetName':target_name,
        'postProcessingScript':None,
        'taskType': 'trainAndscore'
    }
}
pmml = model_to_pmml(toExportDict, pmml_f_name=pmml_file_name)