In [28]:
import pandas as pd
import numpy as np

from sklearn.svm import LinearSVC
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_classification

Start with a toy dataset

In [20]:
X, y = make_classification(n_features=4, random_state=0)

model = make_pipeline(StandardScaler(),
                      LinearSVC(random_state=0))
cv_scores = cross_val_score(estimator=model,
                X=X,
                y=y)
print(cv_scores)
print(np.mean(cv_scores))

[0.9  0.8  1.   0.95 0.85]
0.9


Use the heart dataset for classification

In [30]:
full_data = pd.read_csv('heart.csv')
full_data.columns

Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

In [31]:
numerical_features = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']
categorical_features = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

In [65]:
full_data = pd.read_csv('heart.csv')
X = full_data.drop('HeartDisease', axis=1)
y = full_data['HeartDisease']

numeric_transformer = make_pipeline(StandardScaler())
categorical_transformer = make_pipeline(OneHotEncoder())
preprocessor = ColumnTransformer(transformers=[('num', numeric_transformer, numerical_features),
                                               ('cat', categorical_transformer, categorical_features)])
model = make_pipeline(preprocessor,
                      LinearSVC(random_state=0, tol=0.0001, C=0.2, max_iter=2000))
model.fit(X,y)
cv_scores = cross_val_score(estimator=model,
                X=X,
                y=y)
print(cv_scores)
print(np.mean(cv_scores))

[0.85869565 0.83695652 0.8423913  0.8579235  0.75409836]
0.8300130672368734


In [66]:
model.named_steps['linearsvc'].coef_

array([[ 0.05229119,  0.01923378, -0.14946039,  0.15294729, -0.03611324,
         0.13054428, -0.24988617,  0.19801177,  0.36906474, -0.18575549,
        -0.1567956 , -0.07838804,  0.02185109, -0.02535151, -0.04837399,
        -0.17782125,  0.12594685, -0.03870253,  0.39902003, -0.4121919 ]])

In [67]:
model.named_steps['linearsvc'].intercept_

array([-0.0518744])