# Linear Regression

In [333]:
# Libraries
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, show, output_notebook
output_notebook()
from pandas.api.types import is_numeric_dtype

In [334]:
# Load data
dataset = pd.read_csv("../dataset/student-mat.csv", delimiter=";")

In [335]:
# Choose features and target
target = "G3"
skiped_features = []

features = list(dataset.columns)
features.remove(target)
features = np.setdiff1d(features, skiped_features)

In [336]:
# Convert nominal features and/or target to numeric values

# This function get a dataset as parameter and for every nominal feature makes
# a dictionary which maps every original nominal value of the feature to the 
# new numeric value. Original numeric features are skiped. The function returns
# dictionary where keys are nominal features from the input dataset and values are nested dictionares
# of feature values mappings as described above.
def nom_to_num(dataset):
    raw_dictionary = []
    for feature in dataset.columns:
        if not is_numeric_dtype(type(dataset.loc[0,feature])):
            nominals = np.unique(dataset.loc[:,feature])
            enumeration = np.arange(len(nominals))
            mapping = dict(list(zip(nominals, enumeration)))
            raw_dictionary.append((feature, mapping))
    return dict(raw_dictionary)

# Below function converts given dataset to a dataset with only numerical values
def to_numeric(dataset):
    dataset = dataset.copy()
    mapping = nom_to_num(dataset)
    features = list(mapping.keys())
    dataset[features] = dataset[features].apply(lambda col: col.map(lambda val: mapping[col.name][val]))
    return dataset

dataset = to_numeric(dataset)

In [337]:
# # Add square features
# X = np.concatenate((X,X**2), axis=1)

In [338]:
# Prepare datasets
X = dataset.loc[:,features].values
Y = dataset.loc[:,target].values

dataset_length = len(X)
cv_start_index = int(0.6*dataset_length)
test_start_index = int(0.8*dataset_length)

X_training = X[:cv_start_index]
Y_training = Y[:cv_start_index]
X_cv = X[cv_start_index:test_start_index]
Y_cv = Y[cv_start_index:test_start_index]
X_test = X[test_start_index:]
Y_test = Y[test_start_index:]

In [339]:
# Hypothesis function
def h(THETA, X):
    return X.dot(THETA)

In [340]:
# Cost function
def J(THETA, X, Y, LAMBDA):
    return 1/(2*len(X)) * (np.sum((h(THETA, X)-Y)**2) + LAMBDA * np.sum(THETA**2))

# Derrivative of cost function respects to i-th THETA parameter
def dJ(THETA, X, Y, LAMBDA, i):
    return 1/(len(X)) * (np.sum((h(THETA, X)-Y)*X[:,i]) + LAMBDA * THETA[i])

In [341]:
# Gradient Descent
def gradient_descent(X, Y, a, LAMBDA):
    costs = []
    THETA = np.zeros(X.shape[1])
    while True:
        NEW_THETA = np.copy(THETA)
        for i in range(len(THETA)):
            NEW_THETA[i] = THETA[i] - a*dJ(THETA, X, Y, LAMBDA, i)
        costs.append(J(THETA, X, Y, LAMBDA))
        if J(NEW_THETA, X, Y, LAMBDA) <= J(THETA, X, Y, LAMBDA):
            THETA = NEW_THETA
        else:
            return THETA, costs

In [343]:
%time t, costs = gradient_descent(X_training, Y_training, 0.001, 100)
print(t)

CPU times: user 48.5 s, sys: 310 ms, total: 48.8 s
Wall time: 25.4 s
[-0.05578149 -0.08061156 -0.04875777  0.16440001  0.89733319 -0.00272018
  0.01001843 -0.09714788  0.13795755  0.01719419  0.00696927  0.03240227
 -0.04580736 -0.22035001  0.22651402 -0.03556929  0.02054262  0.04858734
 -0.1738307   0.02348779 -0.0813843   0.0530725  -0.09437511 -0.04098061
  0.07388109  0.01144609 -0.07467737  0.          0.01568127 -0.03155438
 -0.04496074 -0.00200243]


In [346]:
p = figure(title = "Cost function value against iterations",
           x_axis_type="log", x_axis_label="Iterations",
           y_axis_label="Cost function value",
          width=500, height=500)
iterations = list(range(0,len(costs)))
p.line(iterations, costs)
p.line(iterations, costs[-1], line_color="red")
show(p)