# SKFlow

In [1]:
import skflow
from sklearn import datasets, metrics
from sklearn.linear_model import LogisticRegression
import numpy as np

iris = datasets.load_iris()
classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(classifier.predict(iris.data), iris.target)
print("Accuracy: %f" % score)

Step #1, avg. loss: 0.73940
Step #6, avg. loss: 0.98723
Step #11, avg. loss: 0.84934
Step #16, avg. loss: 0.61547
Step #21, avg. loss: 0.69915
Step #26, avg. loss: 0.50782
Step #31, avg. loss: 0.62075
Step #36, avg. loss: 0.71262
Step #41, avg. loss: 0.53582
Step #46, avg. loss: 0.60007
Accuracy: 0.673333


In [2]:
import skflow
from sklearn import datasets, metrics, preprocessing

boston = datasets.load_boston()
X = preprocessing.StandardScaler().fit_transform(boston.data)
regressor = skflow.TensorFlowLinearRegressor()
regressor.fit(X, boston.target)
score = metrics.mean_squared_error(regressor.predict(X), boston.target)
print ("MSE: %f" % score)

Step #1, avg. loss: 603.09216
Step #6, avg. loss: 595.99786
Step #11, avg. loss: 502.91187
Step #16, avg. loss: 423.48126
Step #21, avg. loss: 318.29276
Step #26, avg. loss: 274.46713
Step #31, avg. loss: 203.66376
Step #36, avg. loss: 132.25458
Step #41, avg. loss: 113.07619
Step #46, avg. loss: 106.61577
MSE: 96.454276


In [3]:
import skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(classifier.predict(iris.data), iris.target)
print("Accuracy: %f" % score)

Step #1, avg. loss: 2.53525
Step #6, avg. loss: 1.53022
Step #11, avg. loss: 1.15271
Step #16, avg. loss: 1.09855
Step #21, avg. loss: 1.02066
Step #26, avg. loss: 0.96356
Step #31, avg. loss: 0.88170
Step #36, avg. loss: 0.73331
Step #41, avg. loss: 0.73909
Step #46, avg. loss: 0.64568
Accuracy: 0.666667


In [4]:
import skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()

def my_model(X, y):
    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
    layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.5)
    return skflow.models.logistic_regression(layers, y)

classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(classifier.predict(iris.data), iris.target)
print("Accuracy: %f" % score)

Step #1, avg. loss: 1.88602
Step #6, avg. loss: 1.61660
Step #11, avg. loss: 1.24053
Step #16, avg. loss: 1.29660
Step #21, avg. loss: 1.20027
Step #26, avg. loss: 1.18047
Step #31, avg. loss: 1.06887
Step #36, avg. loss: 0.94805
Step #41, avg. loss: 0.86077
Step #46, avg. loss: 0.93819
Accuracy: 0.553333


# Titantic Data

### Using SKLearn

In [5]:
import pandas
from sklearn.cross_validation import train_test_split

data = pandas.read_csv('tf_examples/data/titanic_train.csv')
y, X = data['Survived'], data[['Age', 'SibSp', 'Fare']].fillna(0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lr = LogisticRegression()
lr.fit(X_train, y_train)
print metrics.accuracy_score(lr.predict(X_test), y_test)

0.664804469274


## Using FKFlow


In [6]:
import skflow
import random

random.seed(42) # to sample data the same way
classifier = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=0.05)
classifier.fit(np.array(X_train), y_train)
print metrics.accuracy_score(classifier.predict(X_test), y_test)

Step #1, avg. loss: 26.16736
Step #51, avg. loss: 4.60090
Step #101, avg. loss: 3.14806
Step #151, avg. loss: 2.95934
Step #201, avg. loss: 2.99349
Step #251, avg. loss: 3.05963
Step #301, avg. loss: 2.85306
Step #351, avg. loss: 3.03066
Step #401, avg. loss: 2.99025
Step #451, avg. loss: 2.82770
0.68156424581


# Using Luther College Admission Data

In [7]:
data = pandas.read_csv('data_int.csv')

## Attending or not

In [8]:
y1, X1 = data['attending'], data[['distance', 'class_percentile', 'visited','act']].fillna(0)
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2)

In [9]:
lr = LogisticRegression()
lr.fit(X_train1, y_train1)
print metrics.accuracy_score(lr.predict(X_test1), y_test1)

0.715083798883


In [10]:
classifier = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=0.05)
classifier.fit(np.array(X_train1), y_train1)
print metrics.accuracy_score(classifier.predict(X_test1), y_test1)

Step #1, avg. loss: 51.82570
Step #51, avg. loss: 4.47064
Step #101, avg. loss: 2.99888
Step #151, avg. loss: 3.06597
Step #201, avg. loss: 2.94103
Step #251, avg. loss: 2.85918
Step #301, avg. loss: 2.91118
Step #351, avg. loss: 2.94023
Step #401, avg. loss: 2.84875
Step #451, avg. loss: 2.84601
0.480446927374


## Accepted or not

In [11]:
y2, X2 = data['accepted'], data[['distance', 'class_percentile', 'visited','act']].fillna(0)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2)

In [12]:
lr = LogisticRegression()
lr.fit(X_train2, y_train2)
print metrics.accuracy_score(lr.predict(X_test2), y_test2)

0.698324022346


In [13]:
classifier = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=0.05)
classifier.fit(np.array(X_train2), y_train2)
print metrics.accuracy_score(classifier.predict(X_test2), y_test2)

Step #1, avg. loss: 36.51136
Step #51, avg. loss: 4.90246
Step #101, avg. loss: 3.16234
Step #151, avg. loss: 2.98122
Step #201, avg. loss: 2.86248
Step #251, avg. loss: 3.08886
Step #301, avg. loss: 2.93802
Step #351, avg. loss: 2.82123
Step #401, avg. loss: 2.91605
Step #451, avg. loss: 2.84474
0.396648044693
