In [None]:
Perceptron Classifier

In [1]:
import numpy as np
import pandas as pd

data_web_address = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"

column_names = ['pregnancy_x',
'plasma_con',
'blood_pressure',
'skin_mm',
'insulin',
'bmi',
'pedigree_func',
'age',
'target']

feature_names = column_names[:-1]

all_data = pd.read_csv(data_web_address , names=column_names) 

X = all_data[feature_names]
y = all_data['target']

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,stratify=y)

In [3]:
from sklearn.preprocessing import StandardScaler 

scaler = StandardScaler()
scaler.fit(X_train) 
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
from sklearn.linear_model import Perceptron

pr = Perceptron()
pr.fit(X_train_scaled, y_train)



Perceptron(alpha=0.0001, class_weight=None, eta0=1.0, fit_intercept=True,
      max_iter=5, n_iter=None, n_jobs=1, penalty=None, random_state=0,
      shuffle=True, tol=None, verbose=0, warm_start=False)

In [5]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

skf = StratifiedKFold(n_splits=3)
cross_val_score(pr, X_train_scaled, y_train, cv=skf,scoring='roc_auc').mean()

0.72861097734760383

In [6]:
from sklearn.metrics import accuracy_score, roc_auc_score

print "Classification accuracy : ", accuracy_score(y_test, pr.predict(X_test_scaled))
print "ROC-AUC Score : ",roc_auc_score(y_test, pr.predict(X_test_scaled))

Classification accuracy :  0.720779220779
ROC-AUC Score :  0.61037037037


In [7]:
#scikit-learn version 0.18.1
from sklearn.model_selection import GridSearchCV

param_dist = {'alpha': [0.1,0.01,0.001,0.0001], 
 'penalty': [None, 'l2','l1','elasticnet'],
 'random_state': [7],
 'class_weight':['balanced',None],'eta0': [0.25,0.5,0.75,1.0], 
 'warm_start':[True,False], 'n_iter':[50]}

gs_perceptron = GridSearchCV(pr, param_dist, scoring='roc_auc',cv=skf).fit(X_train_scaled, y_train)

In [7]:
#scikit-learn version 0.19.0
#used this code in the book: the rest of the book code will work with both scikit 0.8.1 or 0.19.0
from sklearn.model_selection import GridSearchCV

param_dist = {'alpha': [0.1,0.01,0.001,0.0001], 
 'penalty': [None, 'l2','l1','elasticnet'],
 'random_state': [7],
 'class_weight':['balanced',None],'eta0': [0.25,0.5,0.75,1.0], 
 'warm_start':[True,False], 'max_iter':[50], 'tol':[1e-3]}

gs_perceptron = GridSearchCV(pr, param_dist, scoring='roc_auc',cv=skf).fit(X_train_scaled, y_train)

In [8]:
gs_perceptron.best_params_

{'alpha': 0.001,
 'class_weight': None,
 'eta0': 0.5,
 'max_iter': 50,
 'penalty': 'l2',
 'random_state': 7,
 'tol': 0.001,
 'warm_start': True}

In [9]:
gs_perceptron.best_score_

0.79221656570311072

In [10]:
best_perceptron = gs_perceptron.best_estimator_
best_perceptron

Perceptron(alpha=0.001, class_weight=None, eta0=0.5, fit_intercept=True,
      max_iter=50, n_iter=None, n_jobs=1, penalty='l2', random_state=7,
      shuffle=True, tol=0.001, verbose=0, warm_start=True)

In [11]:
from sklearn.ensemble import BaggingClassifier 
param_dist = {
 'max_samples': [0.5,1.0],
 'max_features' : [0.5,1.0],
 'oob_score' : [True, False],
 'n_estimators': [100],
 'n_jobs':[-1],
 'base_estimator__alpha': [0.001,0.002],
 'base_estimator__penalty': [None, 'l2','l1','elasticnet'], }

ensemble_estimator = BaggingClassifier(base_estimator = best_perceptron)
bag_perceptrons = GridSearchCV(ensemble_estimator, param_dist,scoring='roc_auc',cv=skf,n_jobs=-1).fit(X_train_scaled, y_train)

In [12]:
bag_perceptrons.best_score_

0.83299842529587864

In [13]:
bag_perceptrons.best_params_

{'base_estimator__alpha': 0.001,
 'base_estimator__penalty': 'l1',
 'max_features': 1.0,
 'max_samples': 1.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'oob_score': True}