# Linear Regression ML

In [169]:
from numpy import *

#Print RSS (Residuals sum of square) by iterations
def print_error_iteration_for_each_line(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m * x + b)) **2
        print(totalError, '-' ,i)

#Print RSS (Residuals sum of square)
def print_error_for_each_line(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m * x + b)) **2
        print(totalError)

#Compute error code
def compute_error_for_line_given_points(b, m, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m * x + b)) **2
    return totalError / float(len(points))

#Step gradient code
def step_gradient(b_current, m_current, points, learning_rate):
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))
    new_b = b_current - (learning_rate * b_gradient)
    new_m = m_current - (learning_rate * m_gradient)
    return [new_b, new_m]
        

#Gradient descent code
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    b = starting_b
    m = starting_m
    
    for i in range(num_iterations):
        b,m = step_gradient(b,m, array(points), learning_rate)
    return [b,m]
    
#Run method
def run(data):
    points = genfromtxt(data, delimiter=',')
    learning_rate = 0.0001
    initial_b = 0
    initial_m = 0
    num_iterations = 1000
    print ("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
    print ("Running...")
    [b,m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    print(b)
    print(m)
    print ("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))

#Run to print error
def run_error(data):
    points = genfromtxt(data, delimiter=',')
    learning_rate = 0.0001
    initial_b = 0
    initial_m = 0
    num_iterations = 1000
    print_error_for_each_line(initial_b, initial_m, points)
    
#Run to print error
def run_error_iterations(data):
    points = genfromtxt(data, delimiter=',')
    learning_rate = 0.0001
    initial_b = 0
    initial_m = 0
    num_iterations = 1000
    print_error_iteration_for_each_line(initial_b, initial_m, points)

#Run by iterations and rate    
def run_by_num_iterations_and_learning_rate(data, num_iterations, learning_rate):
    points = genfromtxt(data, delimiter=',')
    learning_rate = learning_rate
    initial_b = 0
    initial_m = 0
    num_iterations = num_iterations
    print ("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
    print ("Running...")
    [b,m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    print(b)
    print(m)
    print ("After {0} iterations b = {1}, m = {2}, error = {3}".format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))
    
#Run code Saraj dataset
run('data.csv')

Starting gradient descent at b = 0, m = 0, error = 5565.107834483211
Running...
0.08893651993741346
1.4777440851894448
After 1000 iterations b = 0.08893651993741346, m = 1.4777440851894448, error = 112.61481011613473


# Questão 1:

In [170]:
#Run with discipline dataset
run('income.csv')

Starting gradient descent at b = 0, m = 0, error = 2946.6344970460195
Running...
-0.18234255376510086
3.262182267596014
After 1000 iterations b = -0.18234255376510086, m = 3.262182267596014, error = 103.39842291729676


# Questão 2:

In [171]:
#Run with print of RSS
run_error('income.csv')

710.693685281386
1456.3350965744962
1946.1786765790723
2394.3408235695506
2625.156936734124
3322.061552784899
3626.0514682736953
4276.703677207473
5637.177002965462
7210.577186144827
8393.681302791256
10115.764770279184
12139.10684771108
14351.830369731919
16680.141681382476
19933.04750699792
22584.3622681148
26346.543288944235
29662.22865162597
34361.84035736833
38497.73547078538
43253.080343301575
48820.424738091286
53985.31849899025
59776.24470726105
65518.128374733875
70772.35658989825
76756.15579793125
81957.27573425039
88399.03491138059


# Questão 3:

O erro aumenta de acordo com as iterações.

In [172]:
#Run with print of RSS by iteration
run_error_iterations('income.csv')

710.693685281386 - 0
1456.3350965744962 - 1
1946.1786765790723 - 2
2394.3408235695506 - 3
2625.156936734124 - 4
3322.061552784899 - 5
3626.0514682736953 - 6
4276.703677207473 - 7
5637.177002965462 - 8
7210.577186144827 - 9
8393.681302791256 - 10
10115.764770279184 - 11
12139.10684771108 - 12
14351.830369731919 - 13
16680.141681382476 - 14
19933.04750699792 - 15
22584.3622681148 - 16
26346.543288944235 - 17
29662.22865162597 - 18
34361.84035736833 - 19
38497.73547078538 - 20
43253.080343301575 - 21
48820.424738091286 - 22
53985.31849899025 - 23
59776.24470726105 - 24
65518.128374733875 - 25
70772.35658989825 - 26
76756.15579793125 - 27
81957.27573425039 - 28
88399.03491138059 - 29


# Questão 4

In [182]:
#Run by number of iterations
#15000 and 0.0005
run_by_num_iterations_and_learning_rate('income.csv', 200000, 0.0001)

Starting gradient descent at b = 0, m = 0, error = 2946.6344970460195
Running...
-6.673235734774021
3.6485718802165836
After 200000 iterations b = -6.673235734774021, m = 3.6485718802165836, error = 81.08475625816143


# Questão 5