In [7]:
%tensorflow_version 2.x
import numpy as np
import pandas as pd
from sklearn import metrics
from scipy.stats import zscore
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from sklearn.model_selection import train_test_split


## Read the data set
df = pd.read_csv("https://data.heatonresearch.com/data/t81-558/jh-simple-dataset.csv", na_values=['NA', '?'])


## Generate dummies for job
df = pd.concat([df, pd.get_dummies(df['job'], prefix='job')], axis=1)
df.drop('job', axis=1, inplace=True)


## Generate dummies for area
df = pd.concat([df, pd.get_dummies(df['area'], prefix='area')], axis=1)
df.drop('area', axis=1, inplace=True)


## Generate dummies for product
df = pd.concat([df, pd.get_dummies(df['product'], prefix='product')], axis=1)
df.drop('product', axis=1, inplace=True)

## Missing values for income
df['income'] = df['income'].fillna(df['income'].median())


## Standardizing data
df['income'] = zscore(df['income'])
df['aspect'] = zscore(df['aspect'])
df['save_rate'] = zscore(df['save_rate'])
df['subscriptions'] = zscore(df['subscriptions'])


## Making input and target data
x_headers = df.columns.drop('age').drop('id')
x = df[x_headers].values
y = df['age'].values
display(df)
print("---------------------------------------------------------------------------------------")
print(f"Shape of input is : {x.shape}")


## K_Fold Cross-Validate
kf = KFold(n_splits=5, shuffle=True, random_state=42)
print("---------------------------------------------------------------------------------------")
print(f"kf = {kf}")


## initialization for Cross Validation
oos_y = []          # Out Of Sample _ y
oos_pred = []
fold = 0


## Training Process
for train, test in kf.split(x):
  print("---------------------------------------------------------------------------------------")
  fold+=1
  print(f"Fold #{fold}")
  print("********************************")
  print(f"Shape of training set is : {train.shape}")
  print("********************************")
  print(f"Shape of test set is : {test.shape}")
  print("********************************")
  print("Training set : ")
  print(train)
  print("********************************")
  print("Test set : ")
  print(test)

  x_train = x[train]
  y_train = y[train]
  x_test = x[test]
  y_test = y[test]
  print("---------------------------------------------------------------------------------------")
  print("y_test = ")
  print(y_test)

  
  model = Sequential()
  model.add(Dense(20, input_dim=x.shape[1], activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1))
  model.compile(loss='mean_squared_error', optimizer='adam')
    
  model.fit(x_train,y_train,validation_data=(x_test,y_test),verbose=0,epochs=500)
    
  y_pred = model.predict(x_test)

  oos_y.append(y_test)
  oos_pred.append(y_pred)

  score = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
  print("---------------------------------------------------------------------------------------")
  print(f"RMSE for FOLD {fold} is : {score}")
  print("---------------------------------------------------------------------------------------")
  print("---------------------------------------------------------------------------------------")


print("oos_y = ")
print(oos_y)
print("---------------------------------------------------------------------------------------")
print("---------------------------------------------------------------------------------------")


## Build the oos prediction list and calculate the error.
print("oos_y = ")
print(oos_y)
print("---------------------------------------------------------------------------------------")
oos_y = np.concatenate(oos_y)
oos_pred = np.concatenate(oos_pred)
score = np.sqrt(metrics.mean_squared_error(oos_pred,oos_y))
print(f"Final, out of sample score (RMSE): {score}")    


## Write the cross-validated prediction
oos_y = pd.DataFrame(oos_y)
oos_pred = pd.DataFrame(oos_pred)
oosDF = pd.concat( [df, oos_y, oos_pred],axis=1 )
print("---------------------------------------------------------------------------------------")
print("---------------------------------------------------------------------------------------")
print("oos_y = ")
display(oos_y)
print("---------------------------------------------------------------------------------------")
print("---------------------------------------------------------------------------------------")
print("oos_pred = ")
display(oos_pred)
print("---------------------------------------------------------------------------------------")
print("---------------------------------------------------------------------------------------")
print("oosDF = ")
display(oosDF)

Unnamed: 0,id,income,aspect,subscriptions,dist_healthy,save_rate,dist_unhealthy,age,pop_dense,retail_dense,crime,job_11,job_al,job_am,job_ax,job_bf,job_by,job_cv,job_de,job_dz,job_e2,job_f8,job_gj,job_gv,job_kd,job_ke,job_kl,job_kp,job_ks,job_kw,job_mm,job_nb,job_nn,job_ob,job_pe,job_po,job_pq,job_pz,job_qp,job_qw,job_rn,job_sa,job_vv,job_zz,area_a,area_b,area_c,area_d,product_a,product_b,product_c,product_d,product_e,product_f,product_g
0,1,-0.607550,-0.664918,-0.208449,9.017895,-0.215764,11.738935,49,0.885827,0.492126,0.071100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0
1,2,0.338053,-0.207748,0.839031,7.766643,0.196869,6.805396,51,0.874016,0.342520,0.400809,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
2,3,-0.184205,1.127906,-0.208449,3.632069,-0.714362,13.671772,44,0.944882,0.724409,0.207723,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
3,4,-0.526467,-0.440815,-0.208449,5.372942,-0.542432,4.333286,50,0.889764,0.444882,0.361216,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0
4,5,-2.851675,1.638861,1.886511,3.822477,-0.473660,5.967121,38,0.744094,0.661417,0.068033,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1996,-0.593504,1.414758,-0.208449,5.454545,-0.232957,14.013489,41,0.881890,0.744094,0.104838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0
1996,1997,-3.028085,1.011372,0.839031,3.632069,-0.473660,8.380497,38,0.944882,0.877953,0.063851,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0
1997,1998,-2.826971,1.513363,1.886511,7.168218,0.884591,4.626950,36,0.759843,0.744094,0.098703,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0
1998,1999,1.093101,-1.274478,-1.255928,8.936292,-0.370502,3.281439,46,0.909449,0.598425,0.117803,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0


---------------------------------------------------------------------------------------
Shape of input is : (2000, 53)
---------------------------------------------------------------------------------------
kf = KFold(n_splits=5, random_state=42, shuffle=True)
---------------------------------------------------------------------------------------
Fold #1
********************************
Shape of training set is : (1600,)
********************************
Shape of test set is : (400,)
********************************
Training set : 
[   0    1    2 ... 1997 1998 1999]
********************************
Test set : 
[  23   29   30   32   44   45   49   56   59   63   65   67   69   70
   73   76   78   99  100  109  111  115  120  123  124  128  135  162
  163  168  173  175  185  188  194  196  203  210  211  212  218  220
  231  233  237  239  247  251  254  256  261  266  270  275  281  289
  297  298  300  303  305  306  307  316  322  324  331  342  344  350
  351  352  353  354  361  

Unnamed: 0,0
0,47
1,49
2,46
3,49
4,37
...,...
1995,49
1996,47
1997,49
1998,44


---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
oos_pred = 


Unnamed: 0,0
0,47.782951
1,49.248314
2,44.779884
3,48.830452
4,36.442932
...,...
1995,48.802818
1996,46.506458
1997,49.288727
1998,43.870831


---------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------
oosDF = 


Unnamed: 0,id,income,aspect,subscriptions,dist_healthy,save_rate,dist_unhealthy,age,pop_dense,retail_dense,crime,job_11,job_al,job_am,job_ax,job_bf,job_by,job_cv,job_de,job_dz,job_e2,job_f8,job_gj,job_gv,job_kd,job_ke,job_kl,job_kp,job_ks,job_kw,job_mm,job_nb,job_nn,job_ob,job_pe,job_po,job_pq,job_pz,job_qp,job_qw,job_rn,job_sa,job_vv,job_zz,area_a,area_b,area_c,area_d,product_a,product_b,product_c,product_d,product_e,product_f,product_g,0,0.1
0,1,-0.607550,-0.664918,-0.208449,9.017895,-0.215764,11.738935,49,0.885827,0.492126,0.071100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,47,47.782951
1,2,0.338053,-0.207748,0.839031,7.766643,0.196869,6.805396,51,0.874016,0.342520,0.400809,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,49,49.248314
2,3,-0.184205,1.127906,-0.208449,3.632069,-0.714362,13.671772,44,0.944882,0.724409,0.207723,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,46,44.779884
3,4,-0.526467,-0.440815,-0.208449,5.372942,-0.542432,4.333286,50,0.889764,0.444882,0.361216,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,49,48.830452
4,5,-2.851675,1.638861,1.886511,3.822477,-0.473660,5.967121,38,0.744094,0.661417,0.068033,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,37,36.442932
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1996,-0.593504,1.414758,-0.208449,5.454545,-0.232957,14.013489,41,0.881890,0.744094,0.104838,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,49,48.802818
1996,1997,-3.028085,1.011372,0.839031,3.632069,-0.473660,8.380497,38,0.944882,0.877953,0.063851,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,47,46.506458
1997,1998,-2.826971,1.513363,1.886511,7.168218,0.884591,4.626950,36,0.759843,0.744094,0.098703,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,49,49.288727
1998,1999,1.093101,-1.274478,-1.255928,8.936292,-0.370502,3.281439,46,0.909449,0.598425,0.117803,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,44,43.870831
