## Original Dataset and Modeling

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Generating the date
x_data = np.random.randint(1000, 8000, 1000000)
y_true = x_data + 250

tf.get_logger().setLevel('ERROR')

# Defining feature collumns
feat_cols = [tf.feature_column.numeric_column('size', shape=[1])]
estimator = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

# Splitting the data to train and evaluation data
x_train, x_eval, y_train, y_eval = train_test_split(x_data, y_true, test_size=0.3, random_state=101)

# Specify batching/shuffling/repeating options with numpy_input_fn
input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
train_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
eval_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_eval}, y_eval, batch_size=1, num_epochs=None, shuffle=True)

estimator.train(input_fn=input_func, steps=1000)

# Get and print the training and evaluation metrics
train_metrics = estimator.evaluate(input_fn=train_input_func, steps=500)
eval_metrics = estimator.evaluate(input_fn=eval_input_func, steps=500)
print(train_metrics)
print(eval_metrics)

brand_new_data = np.array([1000, 2000, 7000])
input_fn_predict = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': brand_new_data}, num_epochs=1, shuffle=False)
prediction_result = estimator.predict(input_fn=input_fn_predict)

for res in prediction_result:
    print(res['predictions'])

{'average_loss': 9881.979, 'label/mean': 4771.938, 'loss': 9881.979, 'prediction/mean': 4736.143, 'global_step': 1000}
{'average_loss': 10313.907, 'label/mean': 4734.206, 'loss': 10313.907, 'prediction/mean': 4696.633, 'global_step': 1000}
[1048.5254]
[2095.5664]
[7330.7725]


## Steps how to predict prices correctly

### 1. Hyperparameters tuning

- Increasing/ decreasing `learning rate`
- Changing `Optimizers` 
- Increasing `batch size` 
- Increasing `epochs` number 
- Increasing, decreasing estimator `steps` size 

### 2. Data normalization

### Changing Learning rate

In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Generating the date
x_data = np.random.randint(1000, 8000, 1000000)
y_true = x_data + 250

tf.get_logger().setLevel('ERROR')

# Defining feature collumns
feat_cols = [tf.feature_column.numeric_column('size', shape=[1])]
estimator = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer=tf.keras.optimizers.Adam(learning_rate=0.00239))


# Splitting the data to train and evaluation data
x_train, x_eval, y_train, y_eval = train_test_split(x_data, y_true, test_size=0.3, random_state=101)

# Specify batching/shuffling/repeating options with numpy_input_fn
input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
train_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
eval_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_eval}, y_eval, batch_size=1, num_epochs=None, shuffle=True)

estimator.train(input_fn=input_func, steps=1000)

# Get and print the training and evaluation metrics
train_metrics = estimator.evaluate(input_fn=train_input_func, steps=500)
eval_metrics = estimator.evaluate(input_fn=eval_input_func, steps=500)
print(train_metrics)
print(eval_metrics)

brand_new_data = np.array([1000, 2000, 7000])
input_fn_predict = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': brand_new_data}, num_epochs=1, shuffle=False)
prediction_result = estimator.predict(input_fn=input_fn_predict)

for res in prediction_result:
    print(res['predictions'])

{'average_loss': 60558.742, 'label/mean': 4733.352, 'loss': 60558.742, 'prediction/mean': 4487.2676, 'global_step': 1000}
{'average_loss': 60553.957, 'label/mean': 4749.742, 'loss': 60553.957, 'prediction/mean': 4503.668, 'global_step': 1000}
[1001.7902]
[2002.4004]
[7005.4517]


###  Changing Optimizer and learning rate

In [69]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Generating the date
x_data = np.random.randint(1000, 8000, 1000000)
y_true = x_data + 250

tf.get_logger().setLevel('ERROR')

# Defining feature collumns
feat_cols = [tf.feature_column.numeric_column('size', shape=[1])]
estimator = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer=tf.keras.optimizers.Nadam(learning_rate=0.00239))


# Splitting the data to train and evaluation data
x_train, x_eval, y_train, y_eval = train_test_split(x_data, y_true, test_size=0.3, random_state=101)

# Specify batching/shuffling/repeating options with numpy_input_fn
input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
train_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
eval_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_eval}, y_eval, batch_size=1, num_epochs=None, shuffle=True)

estimator.train(input_fn=input_func, steps=1000)

# Get and print the training and evaluation metrics
train_metrics = estimator.evaluate(input_fn=train_input_func, steps=500)
eval_metrics = estimator.evaluate(input_fn=eval_input_func, steps=500)
print(train_metrics)
print(eval_metrics)

brand_new_data = np.array([1000, 2000, 7000])
input_fn_predict = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': brand_new_data}, num_epochs=1, shuffle=False)
prediction_result = estimator.predict(input_fn=input_fn_predict)

for res in prediction_result:
    print(res['predictions'])

{'average_loss': 69545.36, 'label/mean': 4759.116, 'loss': 69545.36, 'prediction/mean': 4495.482, 'global_step': 1000}
{'average_loss': 69687.79, 'label/mean': 4839.498, 'loss': 69687.79, 'prediction/mean': 4575.602, 'global_step': 1000}
[997.8887]
[1994.605]
[6978.187]


### Changing batch size, epochs, steps, learning rate

In [81]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Generating the date
x_data = np.random.randint(1000, 8000, 1000000)
y_true = x_data + 250

tf.get_logger().setLevel('ERROR')

# Defining feature collumns
feat_cols = [tf.feature_column.numeric_column('size', shape=[1])]
estimator = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer=tf.keras.optimizers.Adam(learning_rate=0.00239))


# Splitting the data to train and evaluation data
x_train, x_eval, y_train, y_eval = train_test_split(x_data, y_true, test_size=0.3, random_state=101)

# Specify batching/shuffling/repeating options with numpy_input_fn
input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=10, num_epochs=2, shuffle=True)
train_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train}, y_train, batch_size=1, num_epochs=None, shuffle=True)
eval_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_eval}, y_eval, batch_size=1, num_epochs=None, shuffle=True)

estimator.train(input_fn=input_func, steps=820)

# Get and print the training and evaluation metrics
train_metrics = estimator.evaluate(input_fn=train_input_func, steps=500)
eval_metrics = estimator.evaluate(input_fn=eval_input_func, steps=500)
print(train_metrics)
print(eval_metrics)

brand_new_data = np.array([1000, 2000, 7000])
input_fn_predict = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': brand_new_data}, num_epochs=1, shuffle=False)
prediction_result = estimator.predict(input_fn=input_fn_predict)

for res in prediction_result:
    print(res['predictions'])

{'average_loss': 64073.938, 'label/mean': 4815.43, 'loss': 64073.938, 'prediction/mean': 4562.3086, 'global_step': 820}
{'average_loss': 64090.656, 'label/mean': 4851.206, 'loss': 64090.656, 'prediction/mean': 4598.055, 'global_step': 820}
[1000.1244]
[1999.214]
[6994.6616]


### Trying with data normalization

- In this example i am not sure should i denormalized data in the end?

In [116]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler


# Generating the date
x_data = np.random.randint(1000, 8000, 1000000)
y_true = x_data + 250

tf.get_logger().setLevel('ERROR')

# Defining feature collumns
feat_cols = [tf.feature_column.numeric_column('size', shape=[1])]
estimator = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

# Splitting the data to train and evaluation data
x_train, x_eval, y_train, y_eval = train_test_split(x_data, y_true, test_size=0.3, random_state=101)


#data normalizing
norm = StandardScaler()

x_train_norm = normalize(x_train).reshape(-1, 1)
x_eval_norm = normalize(x_eval).reshape(-1, 1)
y_train_norm = normalize(y_train).reshape(-1, 1)
y_eval_norm = normalize(y_eval).reshape(-1, 1)


# Specify batching/shuffling/repeating options with numpy_input_fn
input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train_norm}, y_train_norm, batch_size=1, num_epochs=None, shuffle=True)
train_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_train_norm}, y_train_norm, batch_size=1, num_epochs=None, shuffle=True)
eval_input_func = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': x_eval_norm}, y_eval_norm, batch_size=1, num_epochs=None, shuffle=True)

estimator.train(input_fn=input_func, steps=500)

# Get and print the training and evaluation metrics
train_metrics = estimator.evaluate(input_fn=train_input_func, steps=500)
eval_metrics = estimator.evaluate(input_fn=eval_input_func, steps=500)
print(train_metrics)
print(eval_metrics)

brand_new_data = np.array([1000, 2000, 7000])
input_fn_predict = tf.compat.v1.estimator.inputs.numpy_input_fn({'size': brand_new_data}, num_epochs=1, shuffle=False)
prediction_result = estimator.predict(input_fn=input_fn_predict)

for res in prediction_result:
    print(res['predictions'])
#     print(res['predictions'] * x_data.std() + x_data.mean())

{'average_loss': 4.2054263e-10, 'label/mean': -0.10399162, 'loss': 4.2054263e-10, 'prediction/mean': -0.103987165, 'global_step': 500}
{'average_loss': 3.9006845e-10, 'label/mean': 0.017260196, 'loss': 3.9006845e-10, 'prediction/mean': 0.01726218, 'global_step': 500}
[999.9796]
[1999.9592]
[6999.8574]


### Addition
### Keras implementation


In this example also i am not sure should i denormalized data in the end?

In [124]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np

In [125]:
x_data = np.random.randint(1000, 8000, 1000000)
y_true = x_data + 250

In [126]:
x_data = x_data.reshape(-1, 1)

In [127]:
y_true = y_true.reshape(-1, 1)

In [135]:
model = Sequential()
model.add(Dense(1,input_dim=1))
model.add(Dense(1,activation='linear'))
model.compile(optimizer=keras.optimizers.Adam(lr=0.1), loss='mse', metrics=['mse'])

In [136]:
norm = StandardScaler()
x_data_norm = norm.fit_transform(x_data)
y_data_norm = norm.fit_transform(y_true)

In [137]:
model.fit(x_data_norm, y_data_norm, batch_size=10, epochs=1, shuffle=False)



<tensorflow.python.keras.callbacks.History at 0x1d00dc0c160>

In [139]:
brand_new_data = np.array([1000, 2000, 7000])
pred = model.predict(brand_new_data)
print(pred)

[[ 999.9919]
 [1999.9839]
 [6999.944 ]]
