## Running OLS with Neural Network.

Here, we demonstrate that Neural Network without any hidden layers and linear output function is just OLS.

In [45]:
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlite3
import random
import tensorflow as tf
from sklearn.linear_model import LinearRegression

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
import time
import datetime
import plotly.graph_objects as go
from scipy.optimize import bisect
from matplotlib import colors

seed = 42
np.random.seed(seed) # For shuffling X_used and Y

PSID = pd.read_csv('Data/PSID_extended.csv')
X = PSID.drop(['edu_college'],axis=1)
child_hh_inc = X[['hh_inc_at_%i'% i for i in range(30,36)]]
idx = np.sum(np.isnan(child_hh_inc),1)<4

X_used = X[idx].reset_index(drop=True)

chd_inc = np.mean(X_used[['hh_inc_at_%i'% i for i in range(30,36)]],1)
X_used = X_used.drop(columns=['hh_inc_at_%i'% i for i in range(30,36)])
X_used['chd_inc'] = chd_inc

idx = np.random.permutation(X_used.index) # Shuffle rows of X_used and Y in the same way in case rows are ordered in a specific way
X_used = X_used.reindex(idx).reset_index(drop=True)

idx = X_used.birth_age < 11 # Drop rows with average of mother and father's age at birth less than 11
X_used= X_used[~idx].reset_index(drop=True)

X_used = pd.concat((X_used,pd.get_dummies(X_used.mother_marital_at_birth).astype(int)),axis=1)
X_used = X_used.drop(columns='mother_marital_at_birth')
X_used = X_used.rename(columns={1:'married',2:'never_married',3:'widowed',4:'divorced',5:'separated'})
X_used = X_used.drop(columns='married')

Y_used = X_used['chd_inc'].copy()

X_used = X_used[['prt_inc', 'birth_age', 'parent_edu_college', 'person_sex', 'black', 
 'birth_underweight', 'never_married', 'widowed', 'divorced', 'separated']] # Reorder columns

gender = X_used.person_sex.replace({1:0, 0:1})
X_used.loc[:,'person_sex'] = gender  # We set male=0, female=1

n = X_used.shape[0]

X_used.prt_inc = np.log(X_used.prt_inc)
Y_used = np.log(Y_used)

In [46]:
############################################################
########################## OLS #############################
############################################################
OLS = LinearRegression(fit_intercept=True)
OLS.fit(X_used,Y_used)
Y_hat = OLS.predict(X_used)
SE_OLS = np.mean((Y_used-Y_hat.reshape(-1,))**2)
print(SE_OLS)
print(OLS.coef_)

0.46082606122341707
[ 0.4423362  -0.00218205  0.09326481 -0.02599417 -0.36069933 -0.05803311
  0.03198153 -0.21130234 -0.14305046 -0.04423653]


Notice that the residual sum of squares of OLS is around 0.46.

In [48]:
############################################################
################### Linear regression with NN ############
############################################################

tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

input_layer = tf.keras.Input(shape=(X_used.shape[1],), name="linear")
output_layer = tf.keras.layers.Dense(1, activation='linear')(input_layer)

model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

model.compile(loss=tf.keras.losses.MeanSquaredError(),
            optimizer=tf.keras.optimizers.Adam(learning_rate=5e-1))

model.summary()
epochs = 3000
model.fit(X_used,Y_used,batch_size=X_used.shape[0], epochs=epochs)


Epoch 1/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step - loss: 5.5418
Epoch 2/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 315.0129
Epoch 3/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 22.8214
Epoch 4/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 70.9679
Epoch 5/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 177.5603
Epoch 6/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 119.1018
Epoch 7/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 23.7236
Epoch 8/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 6.6310
Epoch 9/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 59.5163
Epoch 10/2000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - l

<keras.src.callbacks.history.History at 0x2da5999a1d0>

In [38]:
# model.trainable_variables
weights = model.trainable_weights
print(weights[0][:])
print(weights[1][:])

tf.Tensor(
[[ 0.4882931 ]
 [-0.001275  ]
 [ 0.0701616 ]
 [-0.02260037]
 [-0.33514962]
 [-0.05491737]
 [ 0.05098736]
 [-0.18900569]
 [-0.12552963]
 [-0.03550678]], shape=(10, 1), dtype=float32)
tf.Tensor([5.2310796], shape=(1,), dtype=float32)


In [40]:
print(OLS.coef_)
print(OLS.intercept_)

[ 0.4423362  -0.00218205  0.09326481 -0.02599417 -0.36069933 -0.05803311
  0.03198153 -0.21130234 -0.14305046 -0.04423653]
5.735094411113735
