# Low Birth Weight

## [Data](http://www.statlab.uni-heidelberg.de/data/linmod/birthweight.html)
There is a birth weight dataset with 189 entires. These rows include 1 label as to weither the baby has a low birth weight, and 9 features.

## Goal
We want to classify an example with 9 features into low birth weight or not. Low birthweight is < 2.5kg

## Methodology
We are going to use a DNN classifier to solve this problem.

In [1]:
# dependancies
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import os
import collections
import time
import numpy as np
import requests
import csv

In [2]:
# Configure TF Logging
tf.logging.set_verbosity(tf.logging.INFO)

In [3]:
# Directories
model_dir_root = '/tmp/low_birthweight'
model_dir = os.path.join(model_dir_root, "deep_" + str(int(time.time())))

In [4]:
# Hyper Params
batch_size = 80
num_steps = 10000
Dataset = collections.namedtuple('Dataset', ['data', 'target'])

In [5]:
# Load Dataset
birth_weight_file = 'birth_weight.csv'

In [6]:
def construct_dataset ():
    # download data and create data file if file does not exist in current directory
    if not os.path.exists(birth_weight_file):
        birthdata_url = 'https://github.com/nfmcclure/tensorflow_cookbook/raw/master/01_Introduction/07_Working_with_Data_Sources/birthweight_data/birthweight.dat'
        birth_file = requests.get(birthdata_url)
        birth_data = birth_file.text.split('\r\n')
        birth_header = birth_data[1].split('\t')
        birth_data = [[float(x) for x in y.split('\t') if len(x)>=1] for y in birth_data[1:] if len(y)>=1]
        with open(birth_weight_file, "w") as f:
            writer = csv.writer(f)
            writer.writerows(birth_data)
            f.close()

    # read birth weight data into memory
    birth_data = []
    with open(birth_weight_file, newline='') as csvfile:
         csv_reader = csv.reader(csvfile)
         birth_header = next(csv_reader)
         for row in csv_reader:
             birth_data.append(row)

    birth_data = [[float(x) for x in row] for row in birth_data]

    # Pull out target variable
    y_vals = np.array([x[0] for x in birth_data])
    # Pull out predictor variables (not id, not target, and not birthweight)
    x_vals = np.array([x[1:9] for x in birth_data])

    # set for reproducible results
    seed = 99
    np.random.seed(seed)
    tf.set_random_seed(seed)

    # Split data into train/test = 80%/20%
    train_indices = np.random.choice(len(x_vals), round(len(x_vals)*0.8), replace=False)
    test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices)))
    x_vals_train = x_vals[train_indices]
    x_vals_test = x_vals[test_indices]
    y_vals_train = y_vals[train_indices]
    y_vals_test = y_vals[test_indices]
    
    # Create training_set Database Object
    train_target = np.array(y_vals_train, dtype=np.int)
    train_data = np.array(x_vals_train)
    training_set = Dataset(data=train_data, target=train_target)
    
    # Create test_set Database Object
    test_target = np.array(y_vals_test, dtype=np.int)
    test_data = np.array(x_vals_test)
    test_set = Dataset(data=test_data, target=test_target)    
    
    return training_set, test_set

In [7]:
def log_data(dataset, example, all=False):
    if all == True:
        # log the training dataset
        print(dataset)

    # log 1 example and 1 answer
    print("X: {}".format(dataset[0][example]))
    print("Y: {}".format(dataset[1][example]))

In [8]:
# Build input function
def generate_input_fn(dataset, batch_size=batch_size):
    def _input_fn():
        X = tf.constant(dataset[0])
        Y = tf.constant(dataset[1], dtype=tf.int32)
        
        X_batch, Y_batch = tf.train.shuffle_batch(
            [X,Y],
            batch_size=batch_size,
            capacity=3*batch_size,
            min_after_dequeue=2*batch_size,
            enqueue_many=True
        )
        
        return {'features': X_batch}, Y_batch
    return _input_fn

In [9]:
# Build classifier
def define_and_run_dnn_classifier(num_steps, logdir, lr=.1, batch_size=batch_size):
    feature_columns = [tf.contrib.layers.real_valued_column('features', dimension=8)]

#     linear classifier    
#     classifier = tf.estimator.LinearClassifier(
#         feature_columns=feature_columns,
#         model_dir=logdir,
#         n_classes=2,
#         optimizer='Ftrl',
#     )
    
#     DNN Classifier
    classifier = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        n_classes=2,
        hidden_units=[10,10,10],
        optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=lr),
        model_dir=logdir
    )
    
    # Train classifer
    classifier.train(
        input_fn=generate_input_fn(
            training_set,
            batch_size=batch_size
        ),
        steps=num_steps
    )
    
    print("Finished running the deep training")
    print("evaluating DNN classifier accuracy")
    
    # Test classifer
    accuracy_score = classifier.evaluate(
        input_fn=generate_input_fn(
            test_set,
            batch_size=batch_size
        ),
        steps=100
    )['accuracy']
    
    print("DNN classifier accuracy: {0:f}".format(accuracy_score))
    
    # Make a prediction
    predictions = classifier.predict(
        input_fn=generate_input_fn(
            test_set,
            batch_size=1
        )
    )
    
    # make a prediction
    print("DNN classifier prediction: ")
    for i in range(10):
        prediction = predictions.__next__()['probabilities']
        print("\nPrediction for example {0}: {1}".format(i, np.argmax(prediction)))
        log_data(training_set, i)

In [10]:
training_set, test_set = construct_dataset()

# log an example
log_data(training_set, 0)

X: [  2.00000000e+01   1.05000000e+02   1.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   2.45000000e+03]
Y: 1


In [11]:
print("Running DNN Classifier")
define_and_run_dnn_classifier(
    num_steps,
    model_dir
)

Running DNN Classifier
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/low_birthweight/deep_1507001956', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/low_birthweight/deep_1507001956/model.ckpt.
INFO:tensorflow:loss = 439.208, step = 1
INFO:tensorflow:global_step/sec: 836.61
INFO:tensorflow:loss = 1.98343, step = 101 (0.121 sec)
INFO:tensorflow:global_step/sec: 860.785
INFO:tensorflow:loss = 4.77132, step = 201 (0.116 sec)
INFO:tensorflow:global_step/sec: 855.181
INFO:tensorflow:loss = 3.79132, step = 301 (0.117 sec)
INFO:tensorflow:global_step/sec: 904.395
INFO:tensorflow:loss = 0.946519, step = 401 (0.111 sec)
INFO:tensorflow:global_step/sec: 897.546
INFO:tensorf