## The purpose of this notebook is to evalaate whether DNNRegression is a suitable approach to understand and predict market caps of corporations, based on a large number of features and a dataset of corporation financial information

## NOTE: All the values represented are in millions

In [55]:
# Standard Imports
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from statistics import mean
%matplotlib inline
path = './Private/Data/MasterDataset.csv'
conversion_factor = 1000000

In [56]:
master_frame = pd.read_csv(path, sep='\t')
master_frame.drop(['Unnamed: 0'],axis=1,inplace=True)
master_frame.dropna(inplace=True)

In [57]:
y_values = master_frame['current_market_cap_usd']
master_frame.drop(['current_ebit'],axis=1, inplace=True)
master_frame.drop(['current_market_cap_usd'],axis=1, inplace=True)
master_frame.drop(['quote_symbol'],axis=1, inplace=True)
master_frame.drop(['sedol'],axis=1, inplace=True)
master_frame.drop(['country'],axis=1, inplace=True)
master_frame.drop(['exchange'],axis=1, inplace=True)
master_frame.drop(['primary_sic_code'],axis=1, inplace=True)
master_frame.drop(['current_price_close'],axis=1, inplace=True)

## Feature List post dropping irrelevant columns:

In [58]:
# Performing a 70-30 train test split
x_train, x_test, y_train, y_test = train_test_split(master_frame,y_values,test_size=0.3,random_state=101)

In [59]:
for column in master_frame.columns:
    print(column)

current_sales
current_ebitda
current_net_income
current_total_assets
current_total_liabilities
current_pe_ratio
actual_eps
current_price_/_cash
current_price_/_sales
dividend_yield


In [60]:
sales = tf.feature_column.numeric_column("current_sales")
ebitda = tf.feature_column.numeric_column("current_ebitda")
net_income = tf.feature_column.numeric_column("current_net_income")
assets = tf.feature_column.numeric_column("current_total_assets")
liabilities = tf.feature_column.numeric_column("current_total_liabilities")
pe = tf.feature_column.numeric_column("current_pe_ratio")
eps = tf.feature_column.numeric_column("actual_eps")
price_cash = tf.feature_column.numeric_column("current_price_/_cash")
price_sales = tf.feature_column.numeric_column("current_price_/_sales")
dividend = tf.feature_column.numeric_column("dividend_yield")

features = [sales, ebitda, net_income, assets, liabilities, pe, eps, price_cash, price_sales, dividend]

In [61]:
input_func = tf.estimator.inputs.pandas_input_fn(x=x_train,y=y_train,batch_size=10,num_epochs=None,shuffle=True)
model_reg = tf.estimator.DNNRegressor(feature_columns=features, hidden_units=[6,10])

n_steps = 100000
model_reg.train(input_fn=input_func, steps=n_steps)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/_r/djs1pctx2rldgh7rxdfj_2cw0000gq/T/tmpsggyqxm6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1434ea518>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create Chec

INFO:tensorflow:global_step/sec: 656.021
INFO:tensorflow:loss = 1258173800.0, step = 6901 (0.152 sec)
INFO:tensorflow:global_step/sec: 675.814
INFO:tensorflow:loss = 4323950.0, step = 7001 (0.149 sec)
INFO:tensorflow:global_step/sec: 626.581
INFO:tensorflow:loss = 2779672.2, step = 7101 (0.160 sec)
INFO:tensorflow:global_step/sec: 672.907
INFO:tensorflow:loss = 172221580.0, step = 7201 (0.148 sec)
INFO:tensorflow:global_step/sec: 673.101
INFO:tensorflow:loss = 123321090.0, step = 7301 (0.149 sec)
INFO:tensorflow:global_step/sec: 690.613
INFO:tensorflow:loss = 235799900.0, step = 7401 (0.144 sec)
INFO:tensorflow:global_step/sec: 682.477
INFO:tensorflow:loss = 6774595000.0, step = 7501 (0.148 sec)
INFO:tensorflow:global_step/sec: 663.694
INFO:tensorflow:loss = 74849050.0, step = 7601 (0.149 sec)
INFO:tensorflow:global_step/sec: 678.983
INFO:tensorflow:loss = 3426912800.0, step = 7701 (0.148 sec)
INFO:tensorflow:global_step/sec: 689.46
INFO:tensorflow:loss = 42872276.0, step = 7801 (0.146

INFO:tensorflow:loss = 268607070.0, step = 15001 (0.147 sec)
INFO:tensorflow:global_step/sec: 687.638
INFO:tensorflow:loss = 11104733.0, step = 15101 (0.146 sec)
INFO:tensorflow:global_step/sec: 680.231
INFO:tensorflow:loss = 9934804.0, step = 15201 (0.146 sec)
INFO:tensorflow:global_step/sec: 679.21
INFO:tensorflow:loss = 78892040.0, step = 15301 (0.146 sec)
INFO:tensorflow:global_step/sec: 703.735
INFO:tensorflow:loss = 20535788.0, step = 15401 (0.142 sec)
INFO:tensorflow:global_step/sec: 704.627
INFO:tensorflow:loss = 3188807.8, step = 15501 (0.143 sec)
INFO:tensorflow:global_step/sec: 696.345
INFO:tensorflow:loss = 556326340.0, step = 15601 (0.143 sec)
INFO:tensorflow:global_step/sec: 704.841
INFO:tensorflow:loss = 201216100.0, step = 15701 (0.143 sec)
INFO:tensorflow:global_step/sec: 657.267
INFO:tensorflow:loss = 237705380.0, step = 15801 (0.149 sec)
INFO:tensorflow:global_step/sec: 711.095
INFO:tensorflow:loss = 24108108.0, step = 15901 (0.142 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:loss = 920116160.0, step = 23101 (0.145 sec)
INFO:tensorflow:global_step/sec: 658.515
INFO:tensorflow:loss = 555748350.0, step = 23201 (0.151 sec)
INFO:tensorflow:global_step/sec: 678.33
INFO:tensorflow:loss = 12717806.0, step = 23301 (0.146 sec)
INFO:tensorflow:global_step/sec: 724.469
INFO:tensorflow:loss = 34318480.0, step = 23401 (0.139 sec)
INFO:tensorflow:global_step/sec: 707.429
INFO:tensorflow:loss = 1824330.5, step = 23501 (0.141 sec)
INFO:tensorflow:global_step/sec: 682.986
INFO:tensorflow:loss = 398357760.0, step = 23601 (0.147 sec)
INFO:tensorflow:global_step/sec: 680.495
INFO:tensorflow:loss = 562699800.0, step = 23701 (0.146 sec)
INFO:tensorflow:global_step/sec: 665.654
INFO:tensorflow:loss = 69574500.0, step = 23801 (0.149 sec)
INFO:tensorflow:global_step/sec: 702.046
INFO:tensorflow:loss = 197307000.0, step = 23901 (0.143 sec)
INFO:tensorflow:global_step/sec: 717.839
INFO:tensorflow:loss = 78335896.0, step = 24001 (0.141 sec)
INFO:tensorflow:global_step/

INFO:tensorflow:loss = 4900423.5, step = 31201 (0.145 sec)
INFO:tensorflow:global_step/sec: 714.325
INFO:tensorflow:loss = 9914221.0, step = 31301 (0.140 sec)
INFO:tensorflow:global_step/sec: 707.714
INFO:tensorflow:loss = 249881260.0, step = 31401 (0.141 sec)
INFO:tensorflow:global_step/sec: 720.665
INFO:tensorflow:loss = 611839800.0, step = 31501 (0.140 sec)
INFO:tensorflow:global_step/sec: 712.448
INFO:tensorflow:loss = 1496945000.0, step = 31601 (0.139 sec)
INFO:tensorflow:global_step/sec: 684.238
INFO:tensorflow:loss = 83451096.0, step = 31701 (0.147 sec)
INFO:tensorflow:global_step/sec: 671.659
INFO:tensorflow:loss = 25431866.0, step = 31801 (0.148 sec)
INFO:tensorflow:global_step/sec: 669.116
INFO:tensorflow:loss = 219983540.0, step = 31901 (0.151 sec)
INFO:tensorflow:global_step/sec: 710.772
INFO:tensorflow:loss = 49079896.0, step = 32001 (0.139 sec)
INFO:tensorflow:global_step/sec: 712.297
INFO:tensorflow:loss = 866830000.0, step = 32101 (0.143 sec)
INFO:tensorflow:global_step

INFO:tensorflow:loss = 318978660.0, step = 39301 (0.145 sec)
INFO:tensorflow:global_step/sec: 706.209
INFO:tensorflow:loss = 1502547300.0, step = 39401 (0.143 sec)
INFO:tensorflow:global_step/sec: 697.157
INFO:tensorflow:loss = 35435990.0, step = 39501 (0.143 sec)
INFO:tensorflow:global_step/sec: 649.155
INFO:tensorflow:loss = 745257200.0, step = 39601 (0.151 sec)
INFO:tensorflow:global_step/sec: 677.654
INFO:tensorflow:loss = 14622863.0, step = 39701 (0.149 sec)
INFO:tensorflow:global_step/sec: 649.52
INFO:tensorflow:loss = 747912640.0, step = 39801 (0.152 sec)
INFO:tensorflow:global_step/sec: 652.537
INFO:tensorflow:loss = 9059117.0, step = 39901 (0.154 sec)
INFO:tensorflow:global_step/sec: 693.284
INFO:tensorflow:loss = 1606697000.0, step = 40001 (0.145 sec)
INFO:tensorflow:global_step/sec: 633.056
INFO:tensorflow:loss = 154495330.0, step = 40101 (0.156 sec)
INFO:tensorflow:global_step/sec: 696.743
INFO:tensorflow:loss = 46038240.0, step = 40201 (0.145 sec)
INFO:tensorflow:global_st

INFO:tensorflow:loss = 14376220.0, step = 47401 (0.140 sec)
INFO:tensorflow:global_step/sec: 697.831
INFO:tensorflow:loss = 25600088.0, step = 47501 (0.146 sec)
INFO:tensorflow:global_step/sec: 675.822
INFO:tensorflow:loss = 10956849.0, step = 47601 (0.146 sec)
INFO:tensorflow:global_step/sec: 693.151
INFO:tensorflow:loss = 123690370.0, step = 47701 (0.147 sec)
INFO:tensorflow:global_step/sec: 702.32
INFO:tensorflow:loss = 849384260.0, step = 47801 (0.141 sec)
INFO:tensorflow:global_step/sec: 724.874
INFO:tensorflow:loss = 2789668.2, step = 47901 (0.139 sec)
INFO:tensorflow:global_step/sec: 660.406
INFO:tensorflow:loss = 92579020.0, step = 48001 (0.150 sec)
INFO:tensorflow:global_step/sec: 699.067
INFO:tensorflow:loss = 1864873.8, step = 48101 (0.144 sec)
INFO:tensorflow:global_step/sec: 722.7
INFO:tensorflow:loss = 4578775.0, step = 48201 (0.139 sec)
INFO:tensorflow:global_step/sec: 689.246
INFO:tensorflow:loss = 8411096600.0, step = 48301 (0.144 sec)
INFO:tensorflow:global_step/sec: 

INFO:tensorflow:loss = 3102347300.0, step = 55501 (0.149 sec)
INFO:tensorflow:global_step/sec: 690.846
INFO:tensorflow:loss = 5900957.0, step = 55601 (0.144 sec)
INFO:tensorflow:global_step/sec: 693.741
INFO:tensorflow:loss = 116908980.0, step = 55701 (0.142 sec)
INFO:tensorflow:global_step/sec: 761.986
INFO:tensorflow:loss = 99969940.0, step = 55801 (0.134 sec)
INFO:tensorflow:global_step/sec: 634.43
INFO:tensorflow:loss = 278322720.0, step = 55901 (0.156 sec)
INFO:tensorflow:global_step/sec: 643.418
INFO:tensorflow:loss = 18664280.0, step = 56001 (0.155 sec)
INFO:tensorflow:global_step/sec: 691.831
INFO:tensorflow:loss = 1510731.8, step = 56101 (0.144 sec)
INFO:tensorflow:global_step/sec: 694.801
INFO:tensorflow:loss = 28398980.0, step = 56201 (0.144 sec)
INFO:tensorflow:global_step/sec: 726.571
INFO:tensorflow:loss = 6409670.0, step = 56301 (0.140 sec)
INFO:tensorflow:global_step/sec: 656.763
INFO:tensorflow:loss = 77758540.0, step = 56401 (0.151 sec)
INFO:tensorflow:global_step/sec

INFO:tensorflow:loss = 65823960.0, step = 63601 (0.148 sec)
INFO:tensorflow:global_step/sec: 718.221
INFO:tensorflow:loss = 4317991.0, step = 63701 (0.140 sec)
INFO:tensorflow:global_step/sec: 726.432
INFO:tensorflow:loss = 17676988.0, step = 63801 (0.138 sec)
INFO:tensorflow:global_step/sec: 707.264
INFO:tensorflow:loss = 847238500.0, step = 63901 (0.141 sec)
INFO:tensorflow:global_step/sec: 708.521
INFO:tensorflow:loss = 1135488300.0, step = 64001 (0.140 sec)
INFO:tensorflow:global_step/sec: 688.383
INFO:tensorflow:loss = 37350396.0, step = 64101 (0.146 sec)
INFO:tensorflow:global_step/sec: 688.695
INFO:tensorflow:loss = 68119340.0, step = 64201 (0.143 sec)
INFO:tensorflow:global_step/sec: 681.226
INFO:tensorflow:loss = 1099023400.0, step = 64301 (0.148 sec)
INFO:tensorflow:global_step/sec: 728.641
INFO:tensorflow:loss = 135590240.0, step = 64401 (0.139 sec)
INFO:tensorflow:global_step/sec: 694.207
INFO:tensorflow:loss = 36532256.0, step = 64501 (0.144 sec)
INFO:tensorflow:global_ste

INFO:tensorflow:global_step/sec: 708.125
INFO:tensorflow:loss = 19782458.0, step = 71801 (0.142 sec)
INFO:tensorflow:global_step/sec: 652.634
INFO:tensorflow:loss = 25798258.0, step = 71901 (0.151 sec)
INFO:tensorflow:global_step/sec: 703.834
INFO:tensorflow:loss = 33346216.0, step = 72001 (0.144 sec)
INFO:tensorflow:global_step/sec: 681.158
INFO:tensorflow:loss = 29049260.0, step = 72101 (0.146 sec)
INFO:tensorflow:global_step/sec: 676.237
INFO:tensorflow:loss = 564362940.0, step = 72201 (0.146 sec)
INFO:tensorflow:global_step/sec: 711.526
INFO:tensorflow:loss = 2390821.0, step = 72301 (0.141 sec)
INFO:tensorflow:global_step/sec: 726.903
INFO:tensorflow:loss = 5920812.0, step = 72401 (0.139 sec)
INFO:tensorflow:global_step/sec: 688.663
INFO:tensorflow:loss = 707178700.0, step = 72501 (0.147 sec)
INFO:tensorflow:global_step/sec: 712.787
INFO:tensorflow:loss = 198017250.0, step = 72601 (0.140 sec)
INFO:tensorflow:global_step/sec: 703.523
INFO:tensorflow:loss = 12414774.0, step = 72701 (

INFO:tensorflow:loss = 629802300.0, step = 79901 (0.141 sec)
INFO:tensorflow:global_step/sec: 702.134
INFO:tensorflow:loss = 3356146.0, step = 80001 (0.144 sec)
INFO:tensorflow:global_step/sec: 677.543
INFO:tensorflow:loss = 368229400.0, step = 80101 (0.147 sec)
INFO:tensorflow:global_step/sec: 656.975
INFO:tensorflow:loss = 162995970.0, step = 80201 (0.150 sec)
INFO:tensorflow:global_step/sec: 700.156
INFO:tensorflow:loss = 1855823500.0, step = 80301 (0.143 sec)
INFO:tensorflow:global_step/sec: 703.047
INFO:tensorflow:loss = 10364495000.0, step = 80401 (0.144 sec)
INFO:tensorflow:global_step/sec: 673.097
INFO:tensorflow:loss = 2606804500.0, step = 80501 (0.147 sec)
INFO:tensorflow:global_step/sec: 675.393
INFO:tensorflow:loss = 218783420.0, step = 80601 (0.148 sec)
INFO:tensorflow:global_step/sec: 705.243
INFO:tensorflow:loss = 5109553.0, step = 80701 (0.142 sec)
INFO:tensorflow:global_step/sec: 644.21
INFO:tensorflow:loss = 85166120.0, step = 80801 (0.155 sec)
INFO:tensorflow:global_

INFO:tensorflow:loss = 3381087200.0, step = 88001 (0.150 sec)
INFO:tensorflow:global_step/sec: 629.485
INFO:tensorflow:loss = 278065500.0, step = 88101 (0.161 sec)
INFO:tensorflow:global_step/sec: 653.189
INFO:tensorflow:loss = 15580972.0, step = 88201 (0.150 sec)
INFO:tensorflow:global_step/sec: 697.657
INFO:tensorflow:loss = 282429660.0, step = 88301 (0.142 sec)
INFO:tensorflow:global_step/sec: 710.95
INFO:tensorflow:loss = 863126850.0, step = 88401 (0.143 sec)
INFO:tensorflow:global_step/sec: 637.227
INFO:tensorflow:loss = 66910484.0, step = 88501 (0.155 sec)
INFO:tensorflow:global_step/sec: 713.526
INFO:tensorflow:loss = 160384740.0, step = 88601 (0.141 sec)
INFO:tensorflow:global_step/sec: 682.045
INFO:tensorflow:loss = 85713510.0, step = 88701 (0.147 sec)
INFO:tensorflow:global_step/sec: 689.456
INFO:tensorflow:loss = 5557079000.0, step = 88801 (0.144 sec)
INFO:tensorflow:global_step/sec: 663.57
INFO:tensorflow:loss = 25737268.0, step = 88901 (0.151 sec)
INFO:tensorflow:global_st

INFO:tensorflow:loss = 10464474000.0, step = 96101 (0.142 sec)
INFO:tensorflow:global_step/sec: 668.473
INFO:tensorflow:loss = 756818560.0, step = 96201 (0.149 sec)
INFO:tensorflow:global_step/sec: 690.42
INFO:tensorflow:loss = 111047080.0, step = 96301 (0.144 sec)
INFO:tensorflow:global_step/sec: 705.856
INFO:tensorflow:loss = 2815785.0, step = 96401 (0.143 sec)
INFO:tensorflow:global_step/sec: 665.314
INFO:tensorflow:loss = 153095060.0, step = 96501 (0.149 sec)
INFO:tensorflow:global_step/sec: 642.867
INFO:tensorflow:loss = 6835906.5, step = 96601 (0.154 sec)
INFO:tensorflow:global_step/sec: 734.745
INFO:tensorflow:loss = 56696652.0, step = 96701 (0.139 sec)
INFO:tensorflow:global_step/sec: 601.131
INFO:tensorflow:loss = 170240320.0, step = 96801 (0.164 sec)
INFO:tensorflow:global_step/sec: 722.214
INFO:tensorflow:loss = 2315384800.0, step = 96901 (0.141 sec)
INFO:tensorflow:global_step/sec: 720.021
INFO:tensorflow:loss = 64993836.0, step = 97001 (0.139 sec)
INFO:tensorflow:global_st

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressor at 0x1434ea898>

In [62]:
#Score with exact training values
prediction_func = tf.estimator.inputs.pandas_input_fn(x=x_train,batch_size=len(x_train),shuffle=False)
true_predictions = list(model_reg.predict(input_fn=prediction_func))
#print(true_predictions)
prediction_list = []
for prediction in true_predictions:
    prediction_list.append(prediction['predictions'][0])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/_r/djs1pctx2rldgh7rxdfj_2cw0000gq/T/tmpsggyqxm6/model.ckpt-100000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


## Using a modified RMSE function with a percentile acceptable bound, in order to better understand our outputs

In [63]:
bound = 0.1
def calc_bounded_error(y_pred, y_real):
    lower = y_real*(1-bound)
    upper = y_real*(1+bound)
    difference_low = (y_pred-lower)**2
    difference_up = (y_pred-upper)**2
    if difference_up > difference_low:
        return difference_low
    return difference_up

In [64]:
def modified_rmse(y_predictions, y_tests):
    my_list = []
    for i in range(0,len(y_predictions)):
        my_list.append(calc_bounded_error(y_predictions[i],y_tests[i]))
    mean_val = mean(my_list)
    modified_rmse = sqrt(mean_val)
    return modified_rmse

## Scoring the training data

In [65]:
y_train = y_train.values

In [66]:
modified_rmse(prediction_list,y_train)

10726.43254999262

## Scoring the testing data

In [67]:
#Score with test values
prediction_func = tf.estimator.inputs.pandas_input_fn(x=x_test,batch_size=len(x_test),shuffle=False)
true_predictions = list(model_reg.predict(input_fn=prediction_func))
#print(true_predictions)
prediction_list = []
for prediction in true_predictions:
    prediction_list.append(prediction['predictions'][0])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/_r/djs1pctx2rldgh7rxdfj_2cw0000gq/T/tmpsggyqxm6/model.ckpt-100000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [68]:
y_test = y_test.values

In [69]:
modified_rmse(prediction_list,y_test)

8096.989816742425

## Conclusion: Using a DNNRegressor does not create a marked differnece in RMSE value