In [1]:
from collections.abc import Sequence
from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shutil
import os


# Encode text values to dummy variables(i.e. [1,0,0],[0,1,0],[0,0,1] for red,green,blue)
def encode_text_dummy(df, name):
    dummies = pd.get_dummies(df[name])
    for x in dummies.columns:
        dummy_name = "{}-{}".format(name, x)
        df[dummy_name] = dummies[x]
    df.drop(name, axis=1, inplace=True)



# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_


# Encode a numeric column as zscores
def encode_numeric_zscore(df, name, mean=None, sd=None):
    if mean is None:
        mean = df[name].mean()

    if sd is None:
        sd = df[name].std()

    df[name] = (df[name] - mean) / sd


# Convert all missing values in the specified column to the median
def missing_median(df, name):
    med = df[name].median()
    df[name] = df[name].fillna(med)


# Convert all missing values in the specified column to the default
def missing_default(df, name, default_value):
    df[name] = df[name].fillna(default_value)


# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column. 
    target_type = df[target].dtypes
    target_type = target_type[0] if isinstance(target_type, Sequence) else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df[result].values.astype(np.float32), dummies.values.astype(np.float32)
    else:
        # Regression
        return df[result].values.astype(np.float32), df[target].values.astype(np.float32)

# Nicely formatted time string
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)


# Regression chart.
def chart_regression(pred,y,sort=True):
    t = pd.DataFrame({'pred' : pred, 'y' : y.flatten()})
    if sort:
        t.sort_values(by=['y'],inplace=True)
    a = plt.plot(t['y'].tolist(),label='expected')
    b = plt.plot(t['pred'].tolist(),label='prediction')
    plt.ylabel('output')
    plt.legend()
    plt.show()

# Remove all rows where the specified column is +/- sd standard deviations
def remove_outliers(df, name, sd):
    drop_rows = df.index[(np.abs(df[name] - df[name].mean()) >= (sd * df[name].std()))]
    df.drop(drop_rows, axis=0, inplace=True)


# Encode a column to a range between normalized_low and normalized_high.
def encode_numeric_range(df, name, normalized_low=-1, normalized_high=1,
                         data_low=None, data_high=None):
    if data_low is None:
        data_low = min(df[name])
        data_high = max(df[name])

    df[name] = ((df[name] - data_low) / (data_high - data_low)) \
               * (normalized_high - normalized_low) + normalized_low

In [25]:
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
import pandas as pd
import io
import requests
import numpy as np
from sklearn import metrics

filename_read = os.path.join('../data',"auto-mpg.csv")

df = pd.read_csv(filename_read,na_values=['NA','?'])

cars = df['name']

df.drop('name',1,inplace=True)

# replace all missing values with the median of the entire row for horsepower
missing_median(df, 'horsepower')

# not sure why we do this here
encode_text_dummy(df, 'origin')

# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
x,y = to_xy(df,"mpg")

# Create our neural network base
model = Sequential()

# Add the layers to the neural network

# input_dim is being set to our mpg axis
# relu is the most popular activation model for deep learning
model.add(Dense(25, input_dim=x.shape[1], activation='relu')) # hidden
model.add(Dense(10,activation='relu')) # hidden
model.add(Dense(1)) # output

model.compile(loss='mean_squared_error', optimizer='adam')

# train our neural network
model.fit(x,y,verbose=0,epochs=100) 

# now that we traned our neural network, we can make predictions
pred = model.predict(x)

score = np.sqrt(metrics.mean_squared_error(pred,y))
print("Final score (RMSE): {}".format(score))

for i in range(10):
    print("{}. Car name: {}, MPG: {}, predicted MPG: {}".format(i+1,cars[i],y[i],pred[i]))




Final score (RMSE): 8.032126426696777
1. Car name: chevrolet chevelle malibu, MPG: 18.0, predicted MPG: [11.477814]
2. Car name: buick skylark 320, MPG: 15.0, predicted MPG: [9.389679]
3. Car name: plymouth satellite, MPG: 18.0, predicted MPG: [10.040405]
4. Car name: amc rebel sst, MPG: 16.0, predicted MPG: [12.03186]
5. Car name: ford torino, MPG: 17.0, predicted MPG: [12.090607]
6. Car name: ford galaxie 500, MPG: 15.0, predicted MPG: [7.8791203]
7. Car name: chevrolet impala, MPG: 14.0, predicted MPG: [5.522309]
8. Car name: plymouth fury iii, MPG: 14.0, predicted MPG: [6.8180547]
9. Car name: pontiac catalina, MPG: 14.0, predicted MPG: [6.5097966]
10. Car name: amc ambassador dpl, MPG: 15.0, predicted MPG: [6.930512]


In [28]:
# iris example with tensorflow
import pandas as pd
import io
import requests
import numpy as np
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping


df=pd.read_csv("../data/iris.csv",na_values=['NA','?'])

species = encode_text_index(df,"species")

x,y = to_xy(df,"species")

model = Sequential()

model.add(Dense(50, input_dim=x.shape[1], activation='relu')) # Hidden 1
model.add(Dense(25, activation='relu')) # Hidden 2
model.add(Dense(y.shape[1], activation='softmax')) # Output

model.compile(loss='categorical_crossentropy', optimizer='adam')

model.fit(x,y,verbose=2,epochs=100)


Train on 150 samples
Epoch 1/100
150/150 - 0s - loss: 1.2380
Epoch 2/100
150/150 - 0s - loss: 0.9208
Epoch 3/100
150/150 - 0s - loss: 0.7874
Epoch 4/100
150/150 - 0s - loss: 0.7455
Epoch 5/100
150/150 - 0s - loss: 0.6997
Epoch 6/100
150/150 - 0s - loss: 0.6515
Epoch 7/100
150/150 - 0s - loss: 0.6177
Epoch 8/100
150/150 - 0s - loss: 0.5863
Epoch 9/100
150/150 - 0s - loss: 0.5579
Epoch 10/100
150/150 - 0s - loss: 0.5318
Epoch 11/100
150/150 - 0s - loss: 0.5090
Epoch 12/100
150/150 - 0s - loss: 0.4892
Epoch 13/100
150/150 - 0s - loss: 0.4710
Epoch 14/100
150/150 - 0s - loss: 0.4522
Epoch 15/100
150/150 - 0s - loss: 0.4385
Epoch 16/100
150/150 - 0s - loss: 0.4222
Epoch 17/100
150/150 - 0s - loss: 0.4075
Epoch 18/100
150/150 - 0s - loss: 0.3971
Epoch 19/100
150/150 - 0s - loss: 0.3807
Epoch 20/100
150/150 - 0s - loss: 0.3715
Epoch 21/100
150/150 - 0s - loss: 0.3616
Epoch 22/100
150/150 - 0s - loss: 0.3473
Epoch 23/100
150/150 - 0s - loss: 0.3408
Epoch 24/100
150/150 - 0s - loss: 0.3276
Epoc

<tensorflow.python.keras.callbacks.History at 0x7faa35740c90>