In [1]:
import pandas as pd

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
import numpy as np

In [None]:
iris_df=load_iris()

In [None]:
iris_df.data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(iris_df.data, iris_df.target, test_size=0.3, random_state=0)

In [None]:
## Pipelines Creation
## 1. Data Preprocessing by using Standard Scaler
## 2. Reduce Dimension using PCA
## 3. Apply Classifier


In [None]:
pipeline_lr=Pipeline([('scalar1',StandardScaler()),
                      ('pca1',PCA(n_components=2)),
                      ('lr_classifier',LogisticRegression(random_state=0))])

In [None]:
pipeline_dt=Pipeline([('scalar2',StandardScaler()),
                      ('pca2',PCA(n_components=2)),
                      ('dt_classifier',DecisionTreeClassifier())])

In [None]:
pipeline_randomforest=Pipeline([('scalar3',StandardScaler()),
                      ('pca3',PCA(n_components=2)),
                      ('rf_classifier',RandomForestClassifier())])

In [None]:
pipelines = [pipeline_lr, pipeline_dt, pipeline_randomforest]

In [None]:
best_accuracy=0.0
best_classifier=0
best_pipeline=""

In [None]:
pipe_dict = {0: 'Logistic Regression', 1: 'Decision Tree', 2: 'RandomForest'}

for pipe in pipelines:
    pipe.fit(X_train, y_train)

In [None]:
for i,model in enumerate(pipelines):
    print("{} Test Accuracy: {}".format(pipe_dict[i],model.score(X_test,y_test)))

In [None]:
for i,model in enumerate(pipelines):
    if model.score(X_test,y_test)>best_accuracy:
        best_accuracy=model.score(X_test,y_test)
        best_pipeline=model
        best_classifier=i


In [None]:
print('Classifier with best accuracy:{}'.format(pipe_dict[best_classifier]))

In [None]:
print(best_classifier)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
pipe = Pipeline([("classifier", RandomForestClassifier())])

In [None]:
grid_param = [
        {"classifier": [LogisticRegression()],
        "classifier__penalty": ['l2','l1'],
         "classifier__C": np.logspace(0, 4, 10)
        },
    
        {"classifier":[LogisticRegression()],
         "classifier__penalty":['l2'],
         "classifier__C":np.logspace(0, 4, 10),
         "classifier__solver":['newton-cg','saga','sag','liblinear']
        },
    
        {"classifier":[RandomForestClassifier()],
         "classifier__n_estimators":[10, 100, 1000],
         "classifier__max_depth":[5,8,15,25,30,None],
         "classifier__min_samples_leaf":[1,2,5,10,15,100],
         "classifier__max_leaf_nodes":[2, 5,10]
        }
        
]

In [None]:
gridsearch = GridSearchCV(pipe, grid_param, cv=5, verbose=0, n_jobs=-1)
best_model = gridsearch.fit(X_train,y_train)

In [None]:
print(best_model.best_estimator_)

In [None]:
print("The mean accuracy of the model is", best_model.score(X_test,y_test))

In [None]:
import seaborn as sns
df=sns.load_dataset('titanic')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df=df[['sex','embarked','alone','pclass','survived']]

In [None]:
df.head()

In [None]:
import numpy as np
df['sex']=np.where(df['sex']=="male",1,0)

In [None]:
ordinal_label = {k: i for i, k in enumerate(df['embarked'].unique(), 0)}

In [None]:
ordinal_label

In [None]:
df['embarked'] = df['embarked'].map(ordinal_label)

In [None]:
df.head()

In [None]:
df['alone']=np.where(df['alone']==True,1,0)

In [None]:
df.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test= train_test_split(df[['sex','embarked','alone','pclass']],
                                               df['survived'], test_size=0.3, random_state=0)

In [None]:
X_train.head()

In [None]:
from sklearn.feature_selection import chi2
f_p_values =chi2(X_train,y_train)