# PIPELINE 

In [98]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import joblib
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,PolynomialFeatures

## Load Iris Dataset

In [40]:
iris_df=load_iris()
X=iris_df.data
y=iris_df.target

In [31]:
iris_df

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [32]:
#Train and Test
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

## ** Pipeline Creation

In [118]:
Pipeline_LR=Pipeline([('Scalar1',StandardScaler()),
              ('Pca1', PCA(n_components=2)),
              ('LR_Classifier',LogisticRegression(random_state=0))])
Pipeline_DT=Pipeline([('Scalar2',StandardScaler()),
              ('Pca2', PCA(n_components=2)),
              ('DT_Classifier',DecisionTreeClassifier())])
Pipeline_RF=Pipeline([('Scalar3',StandardScaler()),
              ('Pca3', PCA(n_components=2)),
              ('RF_Classifier',RandomForestClassifier())])
Pipeline_LNR=Pipeline([('Scalar4',StandardScaler()),
              ('polynomial', PolynomialFeatures(include_bias=False)),
              ('LNR_Classifier',LinearRegression())])
# Fit a KNN classifier with 5 neighbors
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)
Pipeline_KNN=Pipeline([('Scalar5',StandardScaler()),
              ('knn',knn)])
#List the pipeline
Ps= [Pipeline_LR, Pipeline_DT, Pipeline_RF,Pipeline_LNR,Pipeline_KNN]

#for ease of reference, create a dicitonary
Ps_dict={0:"LogisticRegression",1:"DecisionTree",2:"RandomForest",3:"LinearRegression",4:"KNN"}

#
best_accuracy=0.0
best_classifier=0
best_pipeline=""

#Fit the pipeline
for pipe in Ps:
    pipe.fit(X_train, y_train)

## Scoring the data

In [119]:
for i,model in enumerate(Ps):
    print("{} Test Accuracy: {}".format(Ps_dict[i],model.score(X_test, y_test)))

LogisticRegression Test Accuracy: 0.8666666666666667
DecisionTree Test Accuracy: 0.9111111111111111
RandomForest Test Accuracy: 0.9111111111111111
LinearRegression Test Accuracy: 0.9081010240112276
KNN Test Accuracy: 0.9777777777777777


In [117]:
for i,model in enumerate(Ps):
    if model.score(X_test,y_test)>best_accuracy:
        best_accuracy=model.score(X_test,y_test)
        best_pipeline=model
        best_classifier=i
print('classifier with the best accuracy:{}'.format(Ps_dict[best_classifier]))

classifier with the best accuracy:KNN


In [112]:
LR=Pipeline_LR.score(X_test, y_test)
DT=Pipeline_DT.score(X_test, y_test)
RF=Pipeline_RF.score(X_test, y_test)
LNR=Pipeline_LNR.score(X_test, y_test)
KNN2=Pipeline_KNN.score(X_test, y_test)
PIPE_LIST=(LR,DT,RF,LNR,KNN2)
PIPE_DF=pd.DataFrame(PIPE_LIST,index=['LogisticRegression','DecisionTree','RandomForest','LinearRegression','KNN'],columns=['Test Accuracy'])
PIPE_DF

Unnamed: 0,Test Accuracy
LogisticRegression,0.866667
DecisionTree,0.911111
RandomForest,0.911111
LinearRegression,0.908101
KNN,0.977778
