In [4]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score

# fix random seed for reproducibility
np.random.seed(1)

# Note that the iris dataset is available in sklearn by default.
# This data is also conveniently preprocessed.
iris = datasets.load_iris()
X = iris["data"]
Y = iris["target"]

numFolds = 10
kf = KFold(len(X), numFolds, shuffle=True)

# These are "Class objects". For each Class, find the AUC through
# 10 fold cross validation.
lr_Models = LogisticRegression()
online_lr = SGDClassifier(loss = "log", penalty = "l2", tol = 0.0001)
total_lr, total_olr = (0 for i in range (2))

for train_indices, test_indices in kf:

    train_X = X[train_indices, :]; train_Y = Y[train_indices]
    test_X = X[test_indices, :]; test_Y = Y[test_indices]
    
    lr_Models.fit(train_X, train_Y)
    online_lr.fit(train_X, train_Y)
    
    print("LR: ",lr_Models.coef_)
    print("Online LR: ", online_lr.coef_)
    lr_predictions = lr_Models.predict(test_X)
    olr_predictions = online_lr.predict(test_X)
    
    total_lr += accuracy_score(test_Y, lr_predictions)
    total_olr += accuracy_score(test_Y, olr_predictions)

lr_accuracy = total_lr / numFolds
olr_accuracy = total_olr / numFolds
print("Accuracy score of {0}: {1}".format(lr_Models, lr_accuracy))
print("Accuracy score of {0}: {1}".format(online_lr, olr_accuracy))


LR:  [[ 0.42295494  1.40764057 -2.22113637 -0.97115118]
 [ 0.42174329 -1.56982621  0.56801606 -1.38529565]
 [-1.68941373 -1.43445549  2.40110239  2.41550719]]
Online LR:  [[  3.08546457  10.0304055  -15.17550024  -6.17302881]
 [ 32.58686578 -96.14967434  35.17204825 -96.4057426 ]
 [-98.58396006 -91.87605934 189.14779145 170.99651363]]
LR:  [[ 0.39620037  1.43871304 -2.20709626 -1.01215004]
 [ 0.63474428 -1.75863242  0.34002905 -1.18324367]
 [-1.78623204 -1.25189452  2.46204849  2.3828801 ]]
Online LR:  [[ 3.05831963e+00  1.01834532e+01 -2.33213586e+01 -1.20756519e+01]
 [ 2.72556522e+01 -1.09721648e+02  7.47242489e-02 -8.49052799e+01]
 [-1.17837472e+02 -8.77797569e+01  1.69384331e+02  1.64163078e+02]]
LR:  [[ 0.4084191   1.44163656 -2.22849303 -1.01391554]
 [ 0.42564492 -1.62982288  0.56856001 -1.28207449]
 [-1.61681403 -1.40122052  2.30683033  2.44096287]]
Online LR:  [[   8.24347701   31.05695197  -50.73986383  -22.94727629]
 [  12.60630314  -78.37572892   30.68194524  -73.86690022]
 



In [20]:
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
import numpy as np
import pandas as pd
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score

# Note that the iris dataset is available in sklearn by default.
# This data is also conveniently preprocessed.
iris = datasets.load_iris()
X = iris["data"]
Y = iris["target"]

numFolds = 10
kf = KFold(len(X), numFolds, shuffle=True)

# These are "Class objects". For each Class, find the AUC through
# 10 fold cross validation.
Models = [LogisticRegression, SGDClassifier]
params = [{}, {"loss": "log", "penalty": "l2", 'tol':0.0001}]

for param, Model in zip(params, Models):
    total = 0
    for train_indices, test_indices in kf:
        train_X = X[train_indices, :]; train_Y = Y[train_indices]
        test_X = X[test_indices, :]; test_Y = Y[test_indices]
        reg = Model(**param)
        reg.fit(train_X, train_Y)
        predictions = reg.predict(test_X)
        total += accuracy_score(test_Y, predictions)

    accuracy = total / numFolds
    print("Accuracy score of {0}: {1}".format(Model.__name__, accuracy))


Accuracy score of LogisticRegression: 0.9533333333333335
Accuracy score of SGDClassifier: 0.8466666666666667


