# SKFlow

In [16]:
import skflow
from sklearn import datasets, metrics
from sklearn.linear_model import LogisticRegression
import numpy as np

iris = datasets.load_iris()
classifier = skflow.TensorFlowLinearClassifier(n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(classifier.predict(iris.data), iris.target)
print("Accuracy: %f" % score)

Step #1, avg. loss: 0.78198
Step #6, avg. loss: 0.77503
Step #11, avg. loss: 0.64446
Step #16, avg. loss: 0.55808
Step #21, avg. loss: 0.53258
Step #26, avg. loss: 0.66417
Step #31, avg. loss: 0.54165
Step #36, avg. loss: 0.60295
Step #41, avg. loss: 0.58445
Step #46, avg. loss: 0.58475
Accuracy: 0.866667


In [17]:
import skflow
from sklearn import datasets, metrics, preprocessing

boston = datasets.load_boston()
X = preprocessing.StandardScaler().fit_transform(boston.data)
regressor = skflow.TensorFlowLinearRegressor()
regressor.fit(X, boston.target)
score = metrics.mean_squared_error(regressor.predict(X), boston.target)
print ("MSE: %f" % score)

Step #1, avg. loss: 621.17230
Step #6, avg. loss: 527.71362
Step #11, avg. loss: 461.25693
Step #16, avg. loss: 413.54556
Step #21, avg. loss: 369.97858
Step #26, avg. loss: 253.18668
Step #31, avg. loss: 193.09424
Step #36, avg. loss: 184.48141
Step #41, avg. loss: 137.55246
Step #46, avg. loss: 122.01766
MSE: 88.205218


In [18]:
import skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(classifier.predict(iris.data), iris.target)
print("Accuracy: %f" % score)

Step #1, avg. loss: 2.26972
Step #6, avg. loss: 1.37979
Step #11, avg. loss: 1.22527
Step #16, avg. loss: 1.02256
Step #21, avg. loss: 0.92510
Step #26, avg. loss: 0.90141
Step #31, avg. loss: 0.77239
Step #36, avg. loss: 0.72056
Step #41, avg. loss: 0.76597
Step #46, avg. loss: 0.64700
Accuracy: 0.666667


In [19]:
import skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()

def my_model(X, y):
    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
    layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.5)
    return skflow.models.logistic_regression(layers, y)

classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(classifier.predict(iris.data), iris.target)
print("Accuracy: %f" % score)

Step #1, avg. loss: 2.08194
Step #6, avg. loss: 1.54336
Step #11, avg. loss: 1.32321
Step #16, avg. loss: 1.29243
Step #21, avg. loss: 1.26755
Step #26, avg. loss: 0.99657
Step #31, avg. loss: 1.01694
Step #36, avg. loss: 0.91661
Step #41, avg. loss: 0.86029
Step #46, avg. loss: 0.82708
Accuracy: 0.553333


# Titantic Data

### Using SKLearn

In [20]:
import pandas
from sklearn.cross_validation import train_test_split

data = pandas.read_csv('tf_examples/data/titanic_train.csv')
y, X = data['Survived'], data[['Age', 'SibSp', 'Fare']].fillna(0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lr = LogisticRegression()
lr.fit(X_train, y_train)
print metrics.accuracy_score(lr.predict(X_test), y_test)

0.664804469274


## Using FKFlow


In [21]:
import skflow
import random

random.seed(42) # to sample data the same way
classifier = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=0.05)
classifier.fit(np.array(X_train), y_train)
print metrics.accuracy_score(classifier.predict(X_test), y_test)

Step #1, avg. loss: 26.16736
Step #51, avg. loss: 4.60090
Step #101, avg. loss: 3.14806
Step #151, avg. loss: 2.95934
Step #201, avg. loss: 2.99349
Step #251, avg. loss: 3.05963
Step #301, avg. loss: 2.85306
Step #351, avg. loss: 3.03066
Step #401, avg. loss: 2.99025
Step #451, avg. loss: 2.82770
0.68156424581


# Using Luther College Admission Data

In [41]:
data = pandas.read_csv('data_int.csv')

## Attending or not

In [42]:
y1, X1 = data['attending'], data[['distance', 'class_percentile', 'visited','act']].fillna(0)
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2)

In [43]:
lr = LogisticRegression()
lr.fit(X_train1, y_train1)
print metrics.accuracy_score(lr.predict(X_test1), y_test1)

0.664804469274


In [44]:
classifier = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=0.05)
classifier.fit(np.array(X_train1), y_train1)
print metrics.accuracy_score(classifier.predict(X_test1), y_test1)

Step #1, avg. loss: 38.62872
Step #51, avg. loss: 4.37117
Step #101, avg. loss: 3.13203
Step #151, avg. loss: 2.82053
Step #201, avg. loss: 2.97387
Step #251, avg. loss: 2.96353
Step #301, avg. loss: 3.01105
Step #351, avg. loss: 2.76385
Step #401, avg. loss: 2.79359
Step #451, avg. loss: 2.97688
0.614525139665


## Accepted or not

In [45]:
y2, X2 = data['accepted'], data[['distance', 'class_percentile', 'visited','act']].fillna(0)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2)

In [46]:
lr = LogisticRegression()
lr.fit(X_train2, y_train2)
print metrics.accuracy_score(lr.predict(X_test2), y_test2)

0.653631284916


In [47]:
classifier = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=0.05)
classifier.fit(np.array(X_train2), y_train2)
print metrics.accuracy_score(classifier.predict(X_test2), y_test2)

Step #1, avg. loss: 34.80655
Step #51, avg. loss: 4.67989
Step #101, avg. loss: 3.27887
Step #151, avg. loss: 2.97719
Step #201, avg. loss: 2.96897
Step #251, avg. loss: 2.93718
Step #301, avg. loss: 2.78516
Step #351, avg. loss: 2.87127
Step #401, avg. loss: 2.71147
Step #451, avg. loss: 2.86169
0.541899441341
