In [1]:
age = [20, 21, 23, 25, 28, 30, 35, 40]
experience = [0, 0, 1, 2, 3, 5, 10, 12]
income = [ 43. ,  45. ,  51.5,  58. ,  66.5,  75.5,  98. , 113. ]

In [2]:
import numpy as np

In [3]:
Age = np.array(age)
Exp = np.array(experience)
Inc = np.array(income)

In [4]:
ones = np.ones(len(Age))
ones

array([1., 1., 1., 1., 1., 1., 1., 1.])

In [5]:
X = np.c_[ones, age, Exp]

In [6]:
X


array([[ 1., 20.,  0.],
       [ 1., 21.,  0.],
       [ 1., 23.,  1.],
       [ 1., 25.,  2.],
       [ 1., 28.,  3.],
       [ 1., 30.,  5.],
       [ 1., 35., 10.],
       [ 1., 40., 12.]])

In [7]:
Y = np.c_[Inc]

In [8]:
Y

array([[ 43. ],
       [ 45. ],
       [ 51.5],
       [ 58. ],
       [ 66.5],
       [ 75.5],
       [ 98. ],
       [113. ]])

In [9]:
from numpy.linalg import *
left = inv(X.T.dot(X))
right = X.T.dot(Y)
W = left.dot(right)
W.shape

(3, 1)

In [10]:
W

array([[3. ],
       [2. ],
       [2.5]])

In [15]:
ycap = X.dot(W)
ycap

array([[ 43. ],
       [ 45. ],
       [ 51.5],
       [ 58. ],
       [ 66.5],
       [ 75.5],
       [ 98. ],
       [113. ]])

In [16]:
# to find error --> MSE is used --> mean square error.
mse = ((Y-ycap)**2).mean()
mse

1.0344927535741302e-22

In [17]:
# to test quality of your prediction --> Rsquare is used.
# rsq = 1 - (rss/tss)
rss = ((Y - ycap)**2).sum()
tss = ((Y - Y.mean())**2).sum()
rsq = 1 - (rss/tss)
print(" accuracy ", rsq*100)


 accuracy  100.0


In [18]:
# new data

new_age = [20, 28, 42, 22, 32]
new_exp = [0, 5, 10, 1, 8]


In [19]:
ones = np.ones(len(new_age))
ones

array([1., 1., 1., 1., 1.])

In [20]:
X1 = np.c_[ones, new_age, new_exp]
X1

array([[ 1., 20.,  0.],
       [ 1., 28.,  5.],
       [ 1., 42., 10.],
       [ 1., 22.,  1.],
       [ 1., 32.,  8.]])

In [21]:
predict_income = X1.dot(W)
predict_income

array([[ 43. ],
       [ 71.5],
       [112. ],
       [ 49.5],
       [ 87. ]])

In [23]:
np.c_[new_age, new_exp, predict_income]

array([[ 20. ,   0. ,  43. ],
       [ 28. ,   5. ,  71.5],
       [ 42. ,  10. , 112. ],
       [ 22. ,   1. ,  49.5],
       [ 32. ,   8. ,  87. ]])

In [25]:
# in linear regression -->
# error measurement --> MSE
# performance of the model (quality of prediction) --> Rsquare.

In [29]:
# statistic Regression can be devide into two types 
# 1. linear regression
# 2. non linear regression
# the non linear regression can be divided into two types 2nd degree and 3rd degree.

# y = w0 + w1.x --> linear regression
# y = w0 + w1.x + w2.x**2 --> 2nd degree of non linear 
# y = w0 + w1.x + w2.x**2 + w3.x**3 --> 3rd degree of non linear
# income = w0 + w1.age + w2.age**2 + w3.age**3


In [30]:
age


[20, 21, 23, 25, 28, 30, 35, 40]

In [31]:
Age2 = Age**2
print(np.c_[Age, Age2])

[[  20  400]
 [  21  441]
 [  23  529]
 [  25  625]
 [  28  784]
 [  30  900]
 [  35 1225]
 [  40 1600]]


In [32]:
Exp2 = Exp ** 2
np.c_[Exp, Exp2]

array([[  0,   0],
       [  0,   0],
       [  1,   1],
       [  2,   4],
       [  3,   9],
       [  5,  25],
       [ 10, 100],
       [ 12, 144]])

In [34]:
# prepare data for 2nd degree non linear regression
ones = np.ones(len(Age))
X2 = np.c_[ones, Age, Age2, Exp, Exp2]
X2.shape

(8, 5)

In [35]:
X2


array([[1.000e+00, 2.000e+01, 4.000e+02, 0.000e+00, 0.000e+00],
       [1.000e+00, 2.100e+01, 4.410e+02, 0.000e+00, 0.000e+00],
       [1.000e+00, 2.300e+01, 5.290e+02, 1.000e+00, 1.000e+00],
       [1.000e+00, 2.500e+01, 6.250e+02, 2.000e+00, 4.000e+00],
       [1.000e+00, 2.800e+01, 7.840e+02, 3.000e+00, 9.000e+00],
       [1.000e+00, 3.000e+01, 9.000e+02, 5.000e+00, 2.500e+01],
       [1.000e+00, 3.500e+01, 1.225e+03, 1.000e+01, 1.000e+02],
       [1.000e+00, 4.000e+01, 1.600e+03, 1.200e+01, 1.440e+02]])

In [36]:
Y

array([[ 43. ],
       [ 45. ],
       [ 51.5],
       [ 58. ],
       [ 66.5],
       [ 75.5],
       [ 98. ],
       [113. ]])

In [38]:
W2 = inv(X2.T.dot(X2)).dot(X2.T.dot(Y))
W2.shape

(5, 1)

In [39]:
W2

array([[ 3.00000000e+00],
       [ 2.00000000e+00],
       [-2.27373675e-13],
       [ 2.50000000e+00],
       [-4.54747351e-13]])

In [40]:
ycap2 = X2.dot(W2)
ycap2

array([[ 43. ],
       [ 45. ],
       [ 51.5],
       [ 58. ],
       [ 66.5],
       [ 75.5],
       [ 98. ],
       [113. ]])

In [46]:
def rsquare(y,ycap):
    rss = ((y - ycap)**2).sum()
    tss = ((y - y.mean())**2).sum()
    rsq = 1 - (rss/tss)
    return rsq

In [48]:
accuracy = rsquare(Y,ycap2)*100
print("Accuarcy: ",accuracy)

Accuarcy:  100.0


In [49]:
Age3 = Age**3
Exp3 = Exp**3

In [50]:
# Prepare data for 3rd degree non linear model.
X3 = np.c_[ones, Age, Age2, Age3, Exp, Exp2, Exp3]
X3.shape

(8, 7)

In [51]:
X3

array([[1.0000e+00, 2.0000e+01, 4.0000e+02, 8.0000e+03, 0.0000e+00,
        0.0000e+00, 0.0000e+00],
       [1.0000e+00, 2.1000e+01, 4.4100e+02, 9.2610e+03, 0.0000e+00,
        0.0000e+00, 0.0000e+00],
       [1.0000e+00, 2.3000e+01, 5.2900e+02, 1.2167e+04, 1.0000e+00,
        1.0000e+00, 1.0000e+00],
       [1.0000e+00, 2.5000e+01, 6.2500e+02, 1.5625e+04, 2.0000e+00,
        4.0000e+00, 8.0000e+00],
       [1.0000e+00, 2.8000e+01, 7.8400e+02, 2.1952e+04, 3.0000e+00,
        9.0000e+00, 2.7000e+01],
       [1.0000e+00, 3.0000e+01, 9.0000e+02, 2.7000e+04, 5.0000e+00,
        2.5000e+01, 1.2500e+02],
       [1.0000e+00, 3.5000e+01, 1.2250e+03, 4.2875e+04, 1.0000e+01,
        1.0000e+02, 1.0000e+03],
       [1.0000e+00, 4.0000e+01, 1.6000e+03, 6.4000e+04, 1.2000e+01,
        1.4400e+02, 1.7280e+03]])

In [53]:
W3 = inv(X3.T.dot(X3)).dot(X3.T.dot(Y))
W3

array([[ 3.00000003e+00],
       [ 2.00000000e+00],
       [ 8.73114914e-11],
       [-1.36424205e-12],
       [ 2.50000000e+00],
       [-1.45519152e-10],
       [ 8.18545232e-12]])

In [54]:
ycap3 = X3.dot(W3)
ycap3

array([[ 43.        ],
       [ 45.        ],
       [ 51.5       ],
       [ 58.        ],
       [ 66.5       ],
       [ 75.5       ],
       [ 97.99999999],
       [112.99999998]])

In [56]:
accuracy3 = rsquare(Y, ycap3)*100
print("Accuiracy: ",accuracy3)

Accuiracy:  100.0


In [57]:
# case 1
# linear --> 100%
# non linear 2nd degree --> 100%
# non linear 3rd degree --> 100%
# linear is select

In [58]:
# case 2
# linear --> 40%
# non linear 2nd degree --> 85%
# non linear 3rd degree --> 99%
# 3rd degree is selected


In [59]:
# case 3
# linear --> 40%
# non linear 2nd degree --> 44%
# non linear 3rd degree --> 39%
# no module is selected

In [None]:
# case 4
# linear --> 80%
# non linear 2nd degree --> 95%
# non linear 3rd degree --> 95%
# 2nd degree is cheaper in computing cost --> selecyt 2nd degree.