# Out of Core Learning

In [None]:
# write out some toy data
from sklearn.datasets import load_digits
import pickle

digits = load_digits()

X, y = digits.data, digits.target

for i in range(10):
    pickle.dump((X[i::10], y[i::10]), open("data/batch_%02d.pickle" % i, "wb"), -1)

In [None]:
from sklearn.linear_model import SGDClassifier


In [None]:
sgd = SGDClassifier(max_iter=100)

for i in range(9):
    X_batch, y_batch = pickle.load(open("data/batch_%02d.pickle" % i, 'rb'))
    sgd.partial_fit(X_batch, y_batch, classes=range(10))

In [None]:
X_test, y_test = pickle.load(open("data/batch_09.pickle", 'rb'))

sgd.score(X_test, y_test)

Kernel Approximations
=======================

In [None]:
from sklearn.kernel_approximation import RBFSampler

sgd = SGDClassifier(max_iter=100)
kernel_approximation = RBFSampler(gamma=.001, n_components=400)

for i in range(9):
    X_batch, y_batch = pickle.load(open("data/batch_%02d.pickle" % i, 'rb'))
    if i == 0:
        kernel_approximation.fit(X_batch)
    X_transformed = kernel_approximation.transform(X_batch)
    sgd.partial_fit(X_transformed, y_batch, classes=range(10))

In [None]:
X_test, y_test = pickle.load(open("data/batch_09.pickle", 'rb'))

sgd.score(kernel_approximation.transform(X_test), y_test)

# Hashing Vectorizer

# Exercise
Compare the speed and accuracy of ``LogisticRegression``, ``LinearSVC`` and ``SGDClassifier`` on the bank campaign and adult datasets. Try ``LogisticRegression(solver='sag')``.