In [3]:
import numpy as np

In [144]:
def cost(points,m,c):
    M = len(points)
    total_cost = 0
    for i in range(M):
        x = points[i,0]
        y = points[i,1]
        total_cost += (1/M) * ((y - m * x - c)**2)
    return total_cost

In [167]:
# stochastic gradient descent
def stochastic_step_gradient(points,learning_rate,m,c):
    M = len(points)
    for i in range(M):
        x = points[i,0]
        y = points[i,1]
        m = m - learning_rate * ((-2/M)*(y - m*x - c)*x)
        c = c - learning_rate * ((-2/M)*(y - m*x - c))
    return m,c

In [145]:
def step_gradient(points,learning_rate,m,c):
    m_slope, c_slope = 0, 0
    M = len(points)
    for i in range(M):
        x = points[i,0]
        y = points[i,1]
        m_slope += (-2/M)*(y - m*x - c)*x
        c_slope +=(-2/M)*(y - m*x - c)
    new_m = m - learning_rate * m_slope
    new_c = c - learning_rate * c_slope
    return new_m,new_c

In [168]:
def gradient_descent(points,learning_rate,num_iterations):
    m, c,m1,c1 = 0,0,0,0
    for i in range(num_iterations):
        m, c = step_gradient(points,learning_rate,m, c)
        m1,c1 = stochastic_step_gradient(points,learning_rate,m1,c1)
        print(i, "Cost of Batch Gradient Descent: ",cost(points,m,c))
        print(i, "Cost of Stochaistic Gradient Descent: ",cost(points,m1,c1))
    return m,c

In [171]:
def run():
    points = np.loadtxt('data.csv',delimiter = ",")
    learning_rate = 0.0001
    num_iterations = 10
    m, c = gradient_descent(points,learning_rate,num_iterations)
    print("Slope and Intercept are: ",m,c)
    x = points[:,0]
    y = points[:,1]
    y_pred = m*x + c
    u = ((y-y_pred)**2).sum()
    v = ((y-y.mean())**2).sum()
    score = 1 - (u/v)
    print("Score with Gradient Descent is: ",score)

In [172]:
run()

0 Cost of Batch Gradient Descent:  1484.5865574086486
0 Cost of Stochaistic Gradient Descent:  2106.5392947761416
1 Cost of Batch Gradient Descent:  457.8542575737672
1 Cost of Stochaistic Gradient Descent:  838.7592704985236
2 Cost of Batch Gradient Descent:  199.5099857255389
2 Cost of Stochaistic Gradient Descent:  375.24684837845876
3 Cost of Batch Gradient Descent:  134.50591058200533
3 Cost of Stochaistic Gradient Descent:  206.51781164815262
4 Cost of Batch Gradient Descent:  118.1496934223995
4 Cost of Stochaistic Gradient Descent:  145.54573787919594
5 Cost of Batch Gradient Descent:  114.0341490603815
5 Cost of Stochaistic Gradient Descent:  123.78825665772355
6 Cost of Batch Gradient Descent:  112.99857731713657
6 Cost of Stochaistic Gradient Descent:  116.19440981124372
7 Cost of Batch Gradient Descent:  112.73798187568467
7 Cost of Stochaistic Gradient Descent:  113.65044439538873
8 Cost of Batch Gradient Descent:  112.6723843590911
8 Cost of Stochaistic Gradient Descent: 

In [151]:
from sklearn.linear_model import LinearRegression
X = np.loadtxt('data.csv',delimiter = ",")
alg1 = LinearRegression()
alg1.fit(X[:,:-1].reshape(-1,1),X[:,-1])
print(alg1.coef_,alg1.intercept_)
# print(alg1.predict(X[:,:-1].reshape(-1,1)))
print("Score with inbuilt algorithm: ",alg1.score(X[:,:-1].reshape(-1,1),X[:,-1]))

[1.32243102] 7.991020982270399
Score with inbuilt algorithm:  0.598655791538662


In [153]:
def generic_cost(points,m,c):
    M = len(points)
    total_cost = 0
    for i in range(M):
        x = points[i,0]
        y = points[i,1]
        total_cost += (1/M) * ((y - (m * x).sum() - c)**2)
    return total_cost

In [154]:
def generic_step_gradient_descent(points,learning_rate,m,c):
    N = points.shape[1]-1
    m_slope, c_slope = np.zeros(N), 0
    M = len(points)
    for i in range(M):
        x = points[i, :-1]
        y = points[i, -1]
        for j in range(N):
            m_slope[j] += (-2/M) * (y - (m*x).sum() - c)*x[j]
        c_slope += (-2/M) * (y - (m*x).sum() - c)
    new_m = m - learning_rate*m_slope
    new_c = c - learning_rate*c_slope
    return new_m,new_c

In [155]:
# Generic Gradient Descent
def generic_gradient_descent(points,learning_rate,num_iterations):
    N = points.shape[1]-1
    m, c = np.zeros(N) , 0
    for i in range(num_iterations):
        m, c = generic_step_gradient_descent(points,learning_rate,m,c)
        print(i,"Cost: ", generic_cost(points,m,c))
    return m,c

In [165]:
def run_generic():
    # getting the training data
    dataset = np.genfromtxt("0000000000002329_training_diabetes_x_y_train.csv",delimiter = ",")
    learning_rate = 0.0001
    num_iterations = 10
    m, c = generic_gradient_descent(dataset,learning_rate,num_iterations)
    print("Coefficients are: ",m)
    print("Intercept is: ",c)
#     y_pred = m*x + c
#     print(y_pred)

In [166]:
run_generic()

0 Cost:  0.0031732388825565054
1 Cost:  0.005878385646927491
2 Cost:  0.010375041718410248
3 Cost:  0.016662132094447887
4 Cost:  0.024738582274136858
5 Cost:  0.034603318258012
6 Cost:  0.0462552665478313
7 Cost:  0.05969335414636167
8 Cost:  0.07491650855716407
9 Cost:  0.09192365778437837
Coefficients are:  [ 0.00103733  0.00015081  0.00441378  0.00257135  0.00103555  0.00082749
 -0.00246147  0.00236916  0.00346693  0.00282222]
Intercept is:  0.2991747662597829


In [173]:
# Feature Scaling using scale method
from sklearn import preprocessing
data = [[0,3], [9,4], [2,7], [1,1]]
data_scaled1 = preprocessing.scale(data)
print(data_scaled1)

[[-0.84852814 -0.34641016]
 [ 1.69705627  0.11547005]
 [-0.28284271  1.5011107 ]
 [-0.56568542 -1.27017059]]


In [177]:
# Feature scaling using StandardScaler
scaler = preprocessing.StandardScaler()
scaler.fit(data)
data_scaled2 = scaler.transform(data)
print(data_scaled2)

[[-0.84852814 -0.34641016]
 [ 1.69705627  0.11547005]
 [-0.28284271  1.5011107 ]
 [-0.56568542 -1.27017059]]
