In [78]:
# Adapted from sample digits recognition client on Scikit-Learn site.

import numpy as np
from sklearn import linear_model, datasets
from sklearn.cross_validation import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.lda import LDA
from sklearn import linear_model, datasets, metrics


In [79]:
# import some data to play with
iris = datasets.load_iris()
iris.viewkeys()

dict_keys(['target_names', 'data', 'target', 'DESCR', 'feature_names'])

In [90]:
iris

 'data': array([[ 5.1,  3.5,  1.4,  0.2],
        [ 4.9,  3. ,  1.4,  0.2],
        [ 4.7,  3.2,  1.3,  0.2],
        [ 4.6,  3.1,  1.5,  0.2],
        [ 5. ,  3.6,  1.4,  0.2],
        [ 5.4,  3.9,  1.7,  0.4],
        [ 4.6,  3.4,  1.4,  0.3],
        [ 5. ,  3.4,  1.5,  0.2],
        [ 4.4,  2.9,  1.4,  0.2],
        [ 4.9,  3.1,  1.5,  0.1],
        [ 5.4,  3.7,  1.5,  0.2],
        [ 4.8,  3.4,  1.6,  0.2],
        [ 4.8,  3. ,  1.4,  0.1],
        [ 4.3,  3. ,  1.1,  0.1],
        [ 5.8,  4. ,  1.2,  0.2],
        [ 5.7,  4.4,  1.5,  0.4],
        [ 5.4,  3.9,  1.3,  0.4],
        [ 5.1,  3.5,  1.4,  0.3],
        [ 5.7,  3.8,  1.7,  0.3],
        [ 5.1,  3.8,  1.5,  0.3],
        [ 5.4,  3.4,  1.7,  0.2],
        [ 5.1,  3.7,  1.5,  0.4],
        [ 4.6,  3.6,  1. ,  0.2],
        [ 5.1,  3.3,  1.7,  0.5],
        [ 4.8,  3.4,  1.9,  0.2],
        [ 5. ,  3. ,  1.6,  0.2],
        [ 5. ,  3.4,  1.6,  0.4],
        [ 5.2,  3.5,  1.5,  0.2],
        [ 5.2,  3.4,  1.4,  0.2],
      

In [80]:
X = iris.data
Y = iris.target

In [81]:
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling

In [82]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, 
                                test_size=0.2, random_state=10)

In [95]:
# Models we will use
logistic = linear_model.LogisticRegression()
rbm = BernoulliRBM(random_state=0, verbose=False)

classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

In [96]:
lda = LDA(n_components=3)

In [105]:
rbm.learning_rate = 0.06
rbm.n_iter = 100

# More components tend to give better prediction performance, but larger
# fitting time
rbm.n_components = 100
logistic.C = 6000.0

# Training RBM-Logistic Pipeline
classifier.fit(X_train, Y_train)

Pipeline(steps=[('rbm', BernoulliRBM(batch_size=10, learning_rate=0.06, n_components=100, n_iter=100,
       random_state=0, verbose=False)), ('logistic', LogisticRegression(C=6000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False))])

In [106]:
# Training Logistic regression
logistic_classifier = linear_model.LogisticRegression(C=100.0)
logistic_classifier.fit(X_train, Y_train)

LogisticRegression(C=100.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [107]:
# Get predictions
print "The Logistic model using RBM features:"
print "Predict: ", classifier.predict(X_test)
print "Real:    ", Y_test

print

print "The Logistic model using raw features:"
print "Predict: ", logistic_classifier.predict(X_test)
print "Real:    ", Y_test

print

print "Linear Discriminant Analysis: "
lda.fit(X_train, Y_train)
print "Predict: ", lda.predict(X_test)
print "Real:    ", Y_test  

The Logistic model using RBM features:
Predict:  [1 2 0 2 0 0 2 1 0 1 2 2 1 0 0 2 2 0 0 0 2 2 2 0 2 0 1 1 2 2]
Real:     [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]

The Logistic model using raw features:
Predict:  [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 2 0 0 0 2 2 2 0 1 0 1 1 1 2]
Real:     [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]

Linear Discriminant Analysis: 
Predict:  [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]
Real:     [1 2 0 1 0 1 1 1 0 1 1 2 1 0 0 2 1 0 0 0 2 2 2 0 1 0 1 1 1 2]


In [108]:
###############################################################################
# Evaluation

print
print("Logistic regression using RBM features:\n%s\n" % (
    metrics.classification_report(
        Y_test,
        classifier.predict(X_test))))
print("Logistic regression using raw pixel features:\n%s\n" % (
    metrics.classification_report(
        Y_test,
        logistic_classifier.predict(X_test))))


Logistic regression using RBM features:
             precision    recall  f1-score   support

          0       0.91      1.00      0.95        10
          1       1.00      0.46      0.63        13
          2       0.54      1.00      0.70         7

avg / total       0.86      0.77      0.75        30


Logistic regression using raw pixel features:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        10
          1       1.00      0.92      0.96        13
          2       0.88      1.00      0.93         7

avg / total       0.97      0.97      0.97        30


