In [1]:
import math
import numpy as np

In [2]:
def get_training_data(count):
    x1 = np.random.normal(loc = 3, scale = 2, size = count)
    x2 = np.random.normal(loc = -1, scale = 2, size = count)
    y = 3 + x1 + 2*x2 + np.random.normal(loc = 0, scale = math.sqrt(2), size = count)
    
    return (x1,x2,y)

In [41]:
batch_size = 1000

J_theta_old = 0
J_theta_new = 1e5
threshold = 0.00001
eta = 0.01

theta_initial = np.zeros(3)

animation_data = []

global_iterator = 0

total_data=1000000

x1_data,x2_data,y_data = get_training_data(total_data)



while abs(J_theta_new-J_theta_old) > threshold:
    
    J_theta = 0
    m = batch_size
    update_direction = np.zeros(3)
    for i in range(global_iterator, global_iterator + batch_size):
        
        x1 = x1_data[i%total_data]
        x2 = x2_data[i%total_data]
        y = y_data[i%total_data]
        
        h_theta = theta_initial[0] + x1*theta_initial[1] + x2*theta_initial[2]
        J_theta += (y - h_theta)**2
        update_direction += (h_theta - y)*np.array([1,x1,x2])
        
    
    J_theta = J_theta/(2*m)
    global_iterator += batch_size
    global_iterator%=total_data

    J_theta_old = J_theta_new
    J_theta_new = J_theta   
    
    animation_data.append((theta_initial[0],theta_initial[1],theta_initial[2],J_theta))
    
    theta_initial = theta_initial - eta * (1/m) * update_direction
    print(f"J_theta: {J_theta}, Difference: {abs(J_theta_new-J_theta_old)}, Parameters:{theta_initial}")
    

J_theta: 19.988159661426653, Difference: 99980.01184033857, Parameters:[0.04005491 0.16869282 0.04625208]
J_theta: 16.51858289914944, Difference: 3.469576762277214, Parameters:[0.0766367  0.31057333 0.08861461]
J_theta: 14.856932202209746, Difference: 1.6616506969396934, Parameters:[0.10845613 0.43639871 0.13903766]
J_theta: 12.2364279447186, Difference: 2.620504257491147, Parameters:[0.13435173 0.54181143 0.18784168]
J_theta: 10.51769354199907, Difference: 1.7187344027195302, Parameters:[0.15810857 0.63137647 0.23370902]
J_theta: 10.3406978146132, Difference: 0.17699572738586866, Parameters:[0.18156309 0.71595141 0.2848656 ]
J_theta: 9.490585260501843, Difference: 0.850112554111357, Parameters:[0.20302667 0.79117396 0.33571455]
J_theta: 8.165081119169924, Difference: 1.325504141331919, Parameters:[0.22065985 0.84999308 0.38530239]
J_theta: 7.012205186481511, Difference: 1.1528759326884135, Parameters:[0.23384031 0.89560114 0.43468461]
J_theta: 7.264178300007161, Difference: 0.25197311

In [42]:
import pandas as pd

test_df = pd.read_csv("./Data/q2test.csv")
X1 = np.array(test_df['X_1'])
X2 = np.array(test_df['X_2'])
Y =  np.array(test_df['Y'])

# X1

err = Y - (theta_initial[0] + theta_initial[1]*X1 + theta_initial[2]*X2)

# err
err = np.sum(err**2)

err



19796.305641249975

In [26]:
test_df.head()

Unnamed: 0,0,1,2
0,X_1,X_2,Y
1,16.678,13.018,45.537
2,6.583,-5.539,-1.17
3,-19.837,6.089,-3.646
4,-8.412,6.11,8.137


In [14]:
import plotly.graph_objects as go

theta_0_vals = [t[0] for t in animation_data]
theta_1_vals = [t[1] for t in animation_data]
theta_2_vals = [t[2] for t in animation_data]
J_val = [t[3] for t in animation_data]

hover_text = [f'Theta 0: {t0}<br>Theta 1: {t1}<br>Theta 2: {t2}<br>Cost: {J:.2f}'
              for t0, t1, t2, J in zip(theta_0_vals, theta_1_vals, theta_2_vals, J_val)]

fig = go.Figure(data=go.Scatter3d(
    x=theta_0_vals, y=theta_1_vals, z=theta_2_vals,
    marker=dict(
        size=4,
        color=J_val,
        colorscale='Viridis',
    ),
    line=dict(
        color='darkblue',
        width=2
    ),
    hovertemplate=hover_text,
    name="Stochastic Gradient Descent Path"
))

fig.update_layout(
    scene=dict(
        aspectratio = dict( x=1, y=1, z=0.7 ),
        aspectmode = 'manual',
        xaxis_title='Theta 0',
        yaxis_title='Theta 1',
        zaxis_title='Theta 2',
    ),
    showlegend = True,
    title='Stochastic Gradient Descent Path',
)

fig.write_html('stochastic_path.html')



In [24]:
batch_sizes = [1,100,10000,100000]
thresholds = [0.00003,0.0001,0.0001,0.00009]
netas = [0.01,0.01,0.05,0.1]

fig_datas = []

for neta,batch_size,threshold in zip(netas,batch_sizes,thresholds):
    
    J_theta_old = 0
    J_theta_new = 1e5
    # threshold = 0.0001
    eta = neta

    theta_initial = np.zeros(3)

    animation_data = []

    global_iterator = 0

    total_data=1000000

    x1_data,x2_data,y_data = get_training_data(total_data)



    while abs(J_theta_new-J_theta_old) > threshold:
        
        J_theta = 0
        m = batch_size
        update_direction = np.zeros(3)
        for i in range(global_iterator, global_iterator + batch_size):
            
            x1 = x1_data[i%total_data]
            x2 = x2_data[i%total_data]
            y = y_data[i%total_data]
            
            h_theta = theta_initial[0] + x1*theta_initial[1] + x2*theta_initial[2]
            J_theta += (y - h_theta)**2
            update_direction += (h_theta - y)*np.array([1,x1,x2])
            
        
        J_theta = J_theta/(2*m)
        global_iterator += batch_size
        global_iterator%=total_data

        J_theta_old = J_theta_new
        J_theta_new = J_theta   
        
        animation_data.append((theta_initial[0],theta_initial[1],theta_initial[2],J_theta))
        
        theta_initial = theta_initial - eta * (1/m) * update_direction
        print(f"|{threshold}|{batch_size}|J_theta: {J_theta}, Difference: {abs(J_theta_new-J_theta_old)}, Parameters:{theta_initial}")
    
    theta_0_vals = [t[0] for t in animation_data]
    theta_1_vals = [t[1] for t in animation_data]
    theta_2_vals = [t[2] for t in animation_data]
    J_val = [t[3] for t in animation_data]

    hover_text = [f'Theta 0: {t0}<br>Theta 1: {t1}<br>Theta 2: {t2}<br>Cost: {J:.2f}'
                for t0, t1, t2, J in zip(theta_0_vals, theta_1_vals, theta_2_vals, J_val)]

    
    scplt = go.Scatter3d(
    x=theta_0_vals, y=theta_1_vals, z=theta_2_vals,
    marker=dict(
        size=4,
        color=J_val,
        colorscale='Viridis',
    ),
    line=dict(
        color='darkblue',
        width=2
    ),
    hovertemplate=hover_text,
    name=f"Batch size:{batch_size}, Threshold:{threshold}"
    )
    fig_datas.append(scplt)
    
fig = go.Figure(data=fig_datas)

fig.update_layout(
    scene=dict(
        aspectratio = dict( x=1, y=1, z=0.7 ),
        aspectmode = 'manual',
        xaxis_title='Theta 0',
        yaxis_title='Theta 1',
        zaxis_title='Theta 2',
    ),
    showlegend = True,
    title='Stochastic Gradient Descent Path',
)

fig.write_html('stochastic_path_all.html')
    
    

|3e-05|1|J_theta: 4.986570270484172, Difference: 99995.01342972952, Parameters:[ 0.03158028  0.17229467 -0.08553304]
|3e-05|1|J_theta: 45.429142038392705, Difference: 40.44257176790853, Parameters:[0.12689989 0.47431959 0.03346257]
|3e-05|1|J_theta: 19.560296350913625, Difference: 25.86884568747908, Parameters:[0.18944635 0.60460907 0.03960792]
|3e-05|1|J_theta: 3.582151284059645, Difference: 15.97814506685398, Parameters:[0.21621256 0.67926174 0.00993168]
|3e-05|1|J_theta: 0.9500071553229725, Difference: 2.6321441287366723, Parameters:[ 2.29996664e-01  7.48825571e-01 -7.19854865e-04]
|3e-05|1|J_theta: 1.753358188873087, Difference: 0.8033510335501144, Parameters:[ 0.24872289  0.81228886 -0.00879241]
|3e-05|1|J_theta: 8.833807552831056, Difference: 7.080449363957969, Parameters:[0.20669003 0.70823406 0.09299694]
|3e-05|1|J_theta: 0.05867819532091276, Difference: 8.775129357510144, Parameters:[0.21011576 0.72806482 0.08781953]
|3e-05|1|J_theta: 1.1870179919260535, Difference: 1.12833979