# Linear Regression

In [1]:
# Import
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, show, output_notebook
output_notebook()

# Load data
dataset = pd.read_csv("../dataset/student-mat.csv", delimiter=";")

# Select important data
important_columns = ["age", "Medu", "Fedu", "traveltime", 
                    "studytime", "failures", "famrel",
                     "freetime", "goout", "Dalc", "Walc",
                    "health", "absences"]
X = dataset.loc[:,important_columns].values
Y = dataset.loc[:,"G3"].values

# # Adding square features
# X = np.concatenate((X,X**2), axis=1)


# Prepare datasets
dataset_length = len(X)
cv_start_index = int(0.6*dataset_length)
test_start_index = int(0.8*dataset_length)

X_training = X[:cv_start_index]
Y_training = Y[:cv_start_index]
X_cv = X[cv_start_index:test_start_index]
Y_cv = Y[cv_start_index:test_start_index]
X_test = X[test_start_index:]
Y_test = Y[test_start_index:]

In [2]:
# Hypothesis function
def h(THETA, X):
    return X.dot(THETA)

In [3]:
# Cost function
def J(THETA, X, Y, LAMBDA):
    return 1/(2*len(X)) * (np.sum((h(THETA, X)-Y)**2) + LAMBDA * np.sum(THETA**2))

# Derrivative of cost function respects to i-th THETA parameter
def dJ(THETA, X, Y, LAMBDA, i):
    return 1/(len(X)) * (np.sum((h(THETA, X)-Y)*X[:,i]) + LAMBDA * THETA[i])

In [10]:
# Gradient Descent
def gradient_descent(X, Y, a, LAMBDA):
    costs = []
    THETA = np.zeros(X.shape[1])
    while True:
        NEW_THETA = np.copy(THETA)
        for i in range(len(THETA)):
            NEW_THETA[i] = THETA[i] - a*dJ(THETA, X, Y, LAMBDA, i)
        costs.append(J(THETA, X, Y, LAMBDA))
        if J(NEW_THETA, X, Y, LAMBDA) <= J(THETA, X, Y, LAMBDA):
            THETA = NEW_THETA
        else:
            return THETA, costs

In [11]:
%time t, costs = gradient_descent(X_training, Y_training, 0.005, 0)
print(t)

CPU times: user 2.09 s, sys: 0 ns, total: 2.09 s
Wall time: 2.09 s
[ 0.52082089  0.98639869 -0.31280434 -0.42696408  0.180825   -2.12697691
  0.50257063  0.71849646 -0.87252919 -0.41490352  0.55601165 -0.23783577
 -0.03411381]


In [18]:
%time t, costs = gradient_descent(X_training, Y_training, 0.005, 100)
print(t)

CPU times: user 1.07 s, sys: 0 ns, total: 1.07 s
Wall time: 1.08 s
[ 0.49020161  0.69164821  0.08108424 -0.24927706  0.20311689 -1.26505803
  0.42763375  0.46077013 -0.5538562  -0.09010698  0.18464881 -0.19680454
 -0.02625557]


In [19]:
p = figure(title = "Cost function value against iterations",
           x_axis_type="log", x_axis_label="Iterations",
           y_axis_label="Cost function value",
          width=500, height=500)
iterations = list(range(0,len(costs)))
p.line(iterations, costs)
p.line(iterations, costs[-1], line_color="red")
show(p)

In [8]:
a = np.array([1,2,3])
b = np.array([4,5])
c = np.array([6,7])
d = np.array(np.meshgrid(a,b,c)).T.reshape(-1,3)
d

array([[1, 4, 6],
       [1, 5, 6],
       [2, 4, 6],
       [2, 5, 6],
       [3, 4, 6],
       [3, 5, 6],
       [1, 4, 7],
       [1, 5, 7],
       [2, 4, 7],
       [2, 5, 7],
       [3, 4, 7],
       [3, 5, 7]])