In [18]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score

# Note that the iris dataset is available in sklearn by default.
# This data is also conveniently preprocessed.
iris = datasets.load_iris()
X = iris["data"]
Y = iris["target"]

numFolds = 10
kf = KFold(len(X), numFolds, shuffle=True)

# These are "Class objects". For each Class, find the AUC through
# 10 fold cross validation.
lr_Models = LogisticRegression()
online_lr = SGDClassifier(loss = "log", penalty = "l2", tol = 0.0001)

for train_indices, test_indices in kf:

    train_X = X[train_indices, :]; train_Y = Y[train_indices]
    test_X = X[test_indices, :]; test_Y = Y[test_indices]
    
    lr_Models.fit(train_X, train_Y)
    online_lr.fit(train_X, train_Y)
    
    print("LR: ",lr_Models.coef_)
    print("Online LR: ", online_lr.coef_)
    lr_predictions = lr_Models.predict(test_X)
    olr_predictions = online_lr.predict(test_X)
    
    total_lr, total_olr = (0 for i in range (2))
    total_lr += accuracy_score(test_Y, lr_predictions)
    total_olr += accuracy_score(test_Y, olr_predictions)

lr_accuracy = total_lr / numFolds
olr_accuracy = total_olr / numFolds
print("Accuracy score of {0}: {1}".format(lr_Models, lr_accuracy))
print("Accuracy score of {0}: {1}".format(online_lr, olr_accuracy))


LR:  [[ 0.38715959  1.43403451 -2.182532   -1.00483942]
 [ 0.52622748 -1.64837858  0.41108874 -1.21387852]
 [-1.6529227  -1.48398806  2.41050286  2.43227391]]
Online LR:  [[  1.80041137  22.63374478 -25.46296305 -10.80246917]
 [  4.92475164 -73.57525254  14.87811212 -60.59285879]
 [-85.27062639 -81.3554905  137.91309395 142.87688919]]
LR:  [[ 0.43864945  1.38773806 -2.21292198 -0.95851209]
 [ 0.24154522 -1.37077793  0.62265639 -1.29103722]
 [-1.63322064 -1.50066463  2.41373525  2.35112868]]
Online LR:  [[   7.71604938   27.77777778  -43.98148148  -18.00411523]
 [   9.01411485  -97.80757401   36.18379785 -108.70141089]
 [ -94.137893    -99.17741039  149.09325757  138.5291281 ]]
LR:  [[ 0.38791327  1.42623961 -2.19968778 -0.9883412 ]
 [ 0.3993458  -1.52419713  0.49011764 -1.15786176]
 [-1.65229581 -1.51363638  2.42508084  2.40403029]]
Online LR:  [[ 10.969373    32.2568078  -52.84716913 -26.92376323]
 [ 14.64339318 -88.94385299  21.20105652 -85.72459939]
 [-99.77453507 -78.96086495 166.9



In [20]:
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
import numpy as np
import pandas as pd
from sklearn.cross_validation import KFold
from sklearn.metrics import accuracy_score

# Note that the iris dataset is available in sklearn by default.
# This data is also conveniently preprocessed.
iris = datasets.load_iris()
X = iris["data"]
Y = iris["target"]

numFolds = 10
kf = KFold(len(X), numFolds, shuffle=True)

# These are "Class objects". For each Class, find the AUC through
# 10 fold cross validation.
Models = [LogisticRegression, SGDClassifier]
params = [{}, {"loss": "log", "penalty": "l2", 'tol':0.0001}]

for param, Model in zip(params, Models):
    total = 0
    for train_indices, test_indices in kf:
        train_X = X[train_indices, :]; train_Y = Y[train_indices]
        test_X = X[test_indices, :]; test_Y = Y[test_indices]
        reg = Model(**param)
        reg.fit(train_X, train_Y)
        predictions = reg.predict(test_X)
        total += accuracy_score(test_Y, predictions)

    accuracy = total / numFolds
    print("Accuracy score of {0}: {1}".format(Model.__name__, accuracy))


Accuracy score of LogisticRegression: 0.9533333333333335
Accuracy score of SGDClassifier: 0.8466666666666667


