In [1]:
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
x,y=make_classification(n_features=20, n_redundant=0, n_informative=2, random_state=0,n_samples=1000, n_clusters_per_class=1)

In [3]:
x.shape

(1000, 20)

In [4]:
# load and split the data
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split( cancer.data, cancer.target, random_state = 0) # compute minimum and maximum on the training data
scaler = MinMaxScaler().fit( X_train)



In [5]:
X_train_scaled = scaler.transform( X_train)
svm = SVC() # learn an SVM on the scaled training data svm.fit( X_train_scaled, y_train)
svm.fit( X_train_scaled, y_train)



X_test_scaled = scaler.transform( X_test)
print(" Test score: {:.2f}".format( svm.score( X_test_scaled, y_test)))



 Test score: 0.97


# Use Pipeline to link all the steps into one single object

In [6]:
from sklearn.pipeline import Pipeline
pipe = Pipeline([(" scaler", MinMaxScaler()), (" svm", SVC())])

In [7]:
pipe.fit( X_train, y_train)

In [8]:
print(" Test score: {:.2f}". format( pipe.score( X_test, y_test)))

 Test score: 0.97


In [9]:
y_pred = pipe.predict(X_test)

In [10]:
from sklearn import metrics

print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.94      0.96        53
           1       0.97      0.99      0.98        90

    accuracy                           0.97       143
   macro avg       0.97      0.97      0.97       143
weighted avg       0.97      0.97      0.97       143



# Makepipeline for simpler pipe

In [11]:

from sklearn.pipeline import make_pipeline



In [12]:
pipe = make_pipeline( MinMaxScaler(), (SVC()))
print(" Pipeline steps:\ n{}". format( pipe.steps))

 Pipeline steps:\ n[('minmaxscaler', MinMaxScaler()), ('svc', SVC())]


In [13]:
pipe.fit( X_train, y_train)

In [14]:
print(" Test score: {:.2f}". format( pipe.score( X_test, y_test)))

 Test score: 0.97


In [15]:
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipeline=Pipeline([("scalar",StandardScaler()),("pca",PCA(n_components=2)),("svm",SVC())])

In [16]:
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)

In [17]:
pipeline

In [18]:
pipeline['pca'].fit_transform(X_train).shape

(700, 2)

In [19]:
pipeline.fit(X_train,y_train)

In [21]:
pipeline.score(X_train,y_train)

0.7214285714285714

## Complex Transformation

In [25]:
from sklearn.impute import SimpleImputer

numeric_processor=Pipeline([("imputer",SimpleImputer(strategy="median")),("scaler",StandardScaler())])

In [27]:
from sklearn.preprocessing import OneHotEncoder
catagorical_processor=Pipeline([("imputer",SimpleImputer(strategy="most_frequent")),("encoder",OneHotEncoder())])

In [29]:
from sklearn.compose import ColumnTransformer
processor = ColumnTransformer([("numeric",numeric_processor,['salary','age']),("catagorical",catagorical_processor,['age','height'])])

In [30]:
processor

In [33]:
from sklearn.pipeline import Pipeline
pipe=make_pipeline(processor,SVC())

In [34]:
pipe