In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# dataset = {
#     'Experience (Years)': [1, 2, 3, 4, 5, 6, 7, 8],
#     'Salary ($)': [40000, 42500, 45000, 47500, 50000, 52500, 55000, 57500]
# }

# df = pd.DataFrame(dataset)

# df.to_csv("Linear_regression_eg_dataset", index = False)

In [None]:
df = pd.read_csv("Linear_regression_eg_dataset")
print(df)

In [None]:
X_train = np.array(df['Experience (Years)'])
y_train = np.array(df['Salary ($)'])

In [None]:
def compute_cost(X,y,w,b):
    m = X.shape[0]

    cost_sum = 0
    for i in range(m):
        f_wb_i = w * X[i] + b
        cost = (f_wb_i - y[i])**2
        cost_sum = cost_sum + cost
    cost_final = cost_sum/(2*m)

    return cost_final

In [None]:
def compute_gradient(X,y,w,b):
    dj_dw = 0
    dj_db = 0
    m = X.shape[0]

    for i in range(m):
        f_wb_i = w * X[i] + b
        dj_dw_i = (f_wb_i - y[i]) * X[i]
        dj_db_i = f_wb_i - y[i]
        dj_dw = dj_dw + dj_dw_i
        dj_db = dj_db + dj_db_i
    dj_dw = dj_dw/m
    dj_db = dj_db/m

    return dj_dw,dj_db

In [None]:
def gradient_descent(X,y,w_in,b_in,alpha,epochs,cost_function,gradient_function):
    J_history = [] #to record changing values of cost
    p_history = [] #to record values of w and b
    w = w_in
    b = b_in

    for i in range(epochs):
        dj_dw,dj_db = gradient_function(X,y,w,b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

        if i<100000:      # prevent resource exhaustion 
            J_history.append(cost_function(X, y, w , b))
            p_history.append([w,b])

    return w,b,J_history,p_history

In [None]:
w_init = 0
b_init = 0
epochs = 10000
alpha = 0.001
w_final,b_final,J_hist,p_hist = gradient_descent(X_train,y_train,w_init,b_init,alpha,epochs,compute_cost,compute_gradient)

print(f"Final w: {w_final} | Final b: {b_final}")

plt.plot(J_hist)
plt.xlabel('Epoch')
plt.ylabel('Cost')
plt.title('Cost function history')
plt.show()

plt.scatter(X_train, y_train, color='blue', label='Actual data')
plt.plot(X_train, w_final * X_train + b_final, color='red', linewidth=2, label='Regression line')
plt.xlabel('Years of Experience')
plt.ylabel('Salary ($)')
plt.legend()
plt.show()