In [2]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv("austin_final.csv")
# the features or the 'x' values of the data
# these columns are used to train the model
# the last column, i.e, precipitation column 
# will serve as the label 
X = data.drop(['PrecipitationSumInches'], axis = 1)

In [4]:
Y = data['PrecipitationSumInches']

# reshaping it into a 2-D vector

Y = Y.values.reshape(-1, 1)

In [5]:
X = X.filter(['TempAvgF', 'DewPointAvgF', 'HumidityAvgPercent',
                  'SeaLevelPressureAvgInches', 'VisibilityAvgMiles',
                  'WindAvgMPH'], axis = 1)

In [6]:
# def train_validate_test_split_v2(X,Y):
#     train_percent=.7
#     validate_percent=.1
#     np.random.seed(seed=None)
#     perm_x = np.random.permutation(X.index)
#     prem_y = np.random.permutation(Y.index)
#     m_x = len(X.index)
#     m_y = len(Y.index)

#     train_end_x = int(train_percent * m_x)
#     train_end_y = int(train_percent * m_y)
#     validate_end_x = int(validate_percent * m_x) + train_end_x
#     validate_end_y = int(validate_percent * m_y) + train_end_y

#     train_x = X.iloc[perm_x[:train_end_x]]
#     validate_x = X.iloc[perm_x[train_end_x:validate_end_x]]
#     test_x = X.iloc[perm_x[validate_end_x:]]
#     train_y = Y.iloc[prem_y[:train_end_y]]
#     validate_y = Y.iloc[prem_y[train_end_y:validate_end_y]]
#     test_y = Y.iloc[prem_y[validate_end_y:]]
    
#     return train_x, train_y, validate_x, validate_y, test_x, test_y

In [7]:
def train_val_test_split(X, Y):
    p = np.random.permutation(len(Y))
    tr = np.floor(len(Y)*0.7).astype('int')
    te = np.floor(len(Y)*0.8).astype('int')
    X_train = X[p[:tr],:]
    Y_train = Y[p[:tr]]
    X_val = X[p[tr+1:te],:]
    Y_val = Y[p[tr+1:te]]
    X_test = X[p[te+1:],:]
    Y_test = Y[p[te+1:]]
    return X_train, Y_train, X_val, Y_val, X_test, Y_test

In [8]:
# Convert x to numpy array
X = np.array(X)
Y = np.array(Y)

In [9]:
X_train, Y_train, X_val, Y_val, X_test, Y_test = train_val_test_split(X, Y)

In [10]:
from sklearn.linear_model import LinearRegression

In [11]:
LR = LinearRegression()

In [12]:
LR.fit(X_train, Y_train)
# When doing sklearn, it will augment the data with a column of 1s on its own. 
# It is later when you do the formula that you need to add the 1s column to the data.

In [13]:
print(LR.intercept_, LR.coef_)

[0.79547439] [[-0.00773992  0.00967324  0.00272239 -0.00468497 -0.08276551  0.00798989]]


In [14]:
Yhat_skl_val = LR.predict(X_val)

In [15]:
theta = np.matmul(np.matmul(np.linalg.inv(np.matmul(X_train.transpose(), X_train)), X_train.transpose()), Y_train)
#Analytical Solution =  (XX^T)^-1 * X^T * Y

In [16]:
theta

array([[-0.00452917],
       [ 0.00659404],
       [ 0.00448529],
       [ 0.01548405],
       [-0.0806892 ],
       [ 0.00814067]])

In [20]:
# E_in is mean squared error between Y_train and X_train
# E_out is mean squared error between Y_val and X_val
E_in = np.mean((Y_train -LR.predict(X_train))**2)
E_out = np.mean((Y_val -LR.predict(X_val))**2)
print(E_in, E_out)

0.1815988640034728 0.1287914613389737


In [21]:
# To get the intercept, we need to add the augmented column of 1s to the X_train
# X_train_aug = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_aug_train = np.c_[X_train,np.ones(len(Y_train))]
X_aug_val = np.c_[X_val,np.ones(len(Y_val))]

In [18]:
theta = np.matmul(np.matmul(np.linalg.inv(np.matmul(X_aug_train.transpose(), X_aug_train)), X_aug_train.transpose()), Y_train)

In [22]:
hatY_aug_val = np.matmul(X_aug_val, theta)

In [23]:
theta

array([[ 0.79547439],
       [-0.00773992],
       [ 0.00967324],
       [ 0.00272239],
       [-0.00468497],
       [-0.08276551],
       [ 0.00798989]])

In [24]:
# In Gradient decent, initialize the theta to 0. IN SDG, initialization does matter, so write a function to initialize theta to 0
# A flag which tells if initialization is to zero or a random value
def init(X,zeros=True):
    n=X.shape[1]
    if zeros:
        theta = np.zeros((n,1))
    else:
        theta = np.random.randn(n,1) - 0.5
        theta[-1] = 0
    return theta
# Intercept is all initialized to 0. The last column is the intercept

In [31]:
theta = init(X_train, zeros=False)

In [26]:
def predict(X, theta):
    return np.matmul(X, theta)

In [27]:
predict(X_train, theta)

array([[-128.06210635],
       [-157.81798967],
       [-143.11353191],
       [-140.10467267],
       [-133.5238162 ],
       [-132.51945901],
       [-122.15842073],
       [-133.07382471],
       [-119.41523309],
       [-114.22306678],
       [-139.71590513],
       [-113.15082504],
       [-120.99100657],
       [ -86.68067484],
       [-124.9857539 ],
       [-101.47457343],
       [-123.48799178],
       [-144.09775972],
       [-135.20479123],
       [-138.88561028],
       [-139.98790036],
       [ -99.13317621],
       [-109.41718593],
       [-148.38569997],
       [-124.78998322],
       [-129.62965066],
       [-119.34175816],
       [-148.04269199],
       [-143.62917791],
       [-114.62542631],
       [-148.61315832],
       [-118.92177893],
       [-145.12337459],
       [-144.42074554],
       [-143.06455923],
       [-146.57946057],
       [-126.66947856],
       [-147.82445143],
       [-117.27318255],
       [-144.45541555],
       [ -90.47882807],
       [-148.788

# Learning Rate
Update rate is the learning rate

It will take X_train, Y_train, theta, learning_rate

It will take Y_pred = predict(X_train, theta)

d_theta will be sum of (Y_theta - Y_pred) * X_train, from i = 1 to m

New theta will be theta + learning_rate * d_theta

In [32]:
theta

array([[-2.02952411],
       [ 0.18737938],
       [ 0.4427812 ],
       [-0.33866025],
       [-0.35808599],
       [ 0.        ]])

In [48]:
def update_rule(X, Y, theta, lr):
    m = X.shape[0]
    Y_pred = predict(X, theta)
    d_theta = np.matmul(X.transpose(), Y_pred - Y)
    theta = theta - lr*d_theta / m
    return theta

In [56]:
def update_weights(X,Y,theta):
    Y_pred = predict(X,theta)
    m = X.shape[0]
    dtheta =- (2/m) * np.dot(X.T, Y - Y_pred)
    return dtheta 

In [49]:
theta = init(X_train, zeros=False)

In [50]:
theta_updated = update_rule(X_train, Y_train, theta, 0.01)

In [51]:
theta_updated

array([[-3.57377873],
       [-2.28540136],
       [-1.97697058],
       [-0.67908776],
       [-1.62055471],
       [-0.14305842]])

In [59]:
learning_rate = 0.0001
theta = init(X_train)
for _ in range(10000):
    dtheta = update_weights(X_train, Y_train, theta)
    print(dtheta)
    theta = theta - learning_rate*dtheta/X_train.shape[0]

[[-18.14569881]
 [-16.59024919]
 [-21.43018418]
 [ -7.7687571 ]
 [ -1.74881907]
 [ -1.35984832]]
[[-18.08449345]
 [-16.54049622]
 [-21.37257952]
 [ -7.74336617]
 [ -1.74119807]
 [ -1.355566  ]]
[[-18.02347376]
 [-16.49089407]
 [-21.31514912]
 [ -7.71805217]
 [ -1.73360018]
 [ -1.35129667]]
[[-17.96263917]
 [-16.44144226]
 [-21.25789243]
 [ -7.69281485]
 [ -1.72602534]
 [ -1.34704026]]
[[-17.90198912]
 [-16.39214035]
 [-21.20080894]
 [ -7.66765399]
 [ -1.71847346]
 [ -1.34279676]]
[[-17.84152306]
 [-16.34298789]
 [-21.14389813]
 [ -7.64256935]
 [ -1.71094449]
 [ -1.33856611]]
[[-17.78124043]
 [-16.29398441]
 [-21.08715946]
 [ -7.6175607 ]
 [ -1.70343835]
 [ -1.33434829]]
[[-17.72114067]
 [-16.24512947]
 [-21.03059242]
 [ -7.59262781]
 [ -1.69595497]
 [ -1.33014324]]
[[-17.66122323]
 [-16.19642262]
 [-20.97419648]
 [ -7.56777046]
 [ -1.68849429]
 [ -1.32595094]]
[[-17.60148755]
 [-16.14786341]
 [-20.91797113]
 [ -7.5429884 ]
 [ -1.68105623]
 [ -1.32177134]]
[[-17.54193309]
 [-16.09945138

In [60]:
learning_rate = 0.0001
theta = init(X_train)
for _ in range(10000):
    theta = update_rule(X_train, Y_train, theta, learning_rate)
    print(theta)
    # theta = theta - learning_rate*dtheta

[[9.07284940e-04]
 [8.29512459e-04]
 [1.07150921e-03]
 [3.88437855e-04]
 [8.74409534e-05]
 [6.79924160e-05]]
[[ 4.02256194e-04]
 [ 5.10975257e-04]
 [ 8.13790946e-04]
 [ 1.90980160e-04]
 [-9.72613802e-07]
 [ 3.71703974e-05]]
[[ 4.62106493e-04]
 [ 6.50366169e-04]
 [ 1.08245769e-03]
 [ 2.26751313e-04]
 [-1.91303019e-05]
 [ 4.57209927e-05]]
[[ 2.99276070e-04]
 [ 6.08004329e-04]
 [ 1.13847457e-03]
 [ 1.69500436e-04]
 [-6.50602931e-05]
 [ 3.86046420e-05]]
[[ 2.27409705e-04]
 [ 6.38657397e-04]
 [ 1.27625633e-03]
 [ 1.49180642e-04]
 [-9.97199869e-05]
 [ 3.77441292e-05]]
[[ 1.21542040e-04]
 [ 6.40833637e-04]
 [ 1.37859260e-03]
 [ 1.14038362e-04]
 [-1.38662186e-04]
 [ 3.44080438e-05]]
[[ 3.14073372e-05]
 [ 6.54940166e-04]
 [ 1.49222266e-03]
 [ 8.46938319e-05]
 [-1.75694256e-04]
 [ 3.20745597e-05]]
[[-6.28424420e-05]
 [ 6.64865999e-04]
 [ 1.59858344e-03]
 [ 5.29405717e-05]
 [-2.13284763e-04]
 [ 2.93587479e-05]]
[[-1.53339910e-04]
 [ 6.77011720e-04]
 [ 1.70512087e-03]
 [ 2.20536622e-05]
 [-2.50452

In [61]:
# SGD, Stochastic Gradient Descent. Need to find the test solution for leaning rate