# Applying Logistic Regression, Decision Tree, Random Forest on IRIS DataSet using Pipeline

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
#from sklearn.externals import joblib
import warnings
warnings.filterwarnings('ignore')

In [7]:
iris=load_iris()
iris.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [4]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [5]:
#Train Test Split
X_train,X_test,y_train,y_test=train_test_split(iris.data,iris.target,random_state=21,test_size=0.3)

In [6]:
#Pipelines creation
## Scaling using standard scaler
## Dimensionality reduction using PCA
## Apply classifier

In [8]:
pipeline_lr=Pipeline([('scaling1',StandardScaler()),('PCA1',PCA(n_components=2)),('lr',LogisticRegression(random_state=41))])

In [9]:
pipeline_dt=Pipeline([('scaling2',StandardScaler()),('PCA2',PCA(n_components=2)),('dt',DecisionTreeClassifier(random_state=41))])

In [10]:
pipeline_rf=Pipeline([('scaling3',StandardScaler()),('PCA3',PCA(n_components=2)),('rf',RandomForestClassifier(random_state=41))])

In [11]:
# Making the list of Pipelines
pipelines=[pipeline_lr,pipeline_dt,pipeline_rf]

In [12]:
best_accuracy=0.0
best_classifier=0.0
best_pipeline=""

In [13]:
#Dictionary of pipelines and classifier types for ease of reference
pipedict={0:'Logistic Regression', 1:'Decision Tree', 2:'Random Forest'}

#Fit the pipelines
for p in pipelines:
    p.fit(X_train,y_train)

In [14]:
for i,model in enumerate(pipelines):
    print("{} Test Accuracy is {} ".format(pipedict[i],model.score(X_test,y_test)))

Logistic Regression Test Accuracy is 0.9111111111111111 
Decision Tree Test Accuracy is 0.8666666666666667 
Random Forest Test Accuracy is 0.8444444444444444 


In [15]:
for s,model in enumerate(pipelines):
    if model.score(X_test,y_test)>best_accuracy:
        best_accuracy=model.score(X_test,y_test)
        best_classifier=model
        best_pipeline=s
print('Best Alogrithm from LR,DT,RF for IRIS DataSet is {} and its score is {} '.format(pipedict[best_pipeline],best_accuracy))

Best Alogrithm from LR,DT,RF for IRIS DataSet is Logistic Regression and its score is 0.9111111111111111 
