In [1]:
import numpy as np
import pandas as pd

In [2]:
from mlpy.regression import LinearRegression, ZeroRuleforRegression
from mlpy.tools import accuracy_score, train_test_split

In [3]:
# reading data
data = pd.read_csv('datasets/test_scores.csv')
data.head()

Unnamed: 0,EXAM1,EXAM2,EXAM3,FINAL
0,73,80,75,152
1,93,88,93,185
2,89,91,90,180
3,96,98,100,196
4,73,66,70,142


In [4]:
# get features
feature1 = np.array(data['EXAM1'].tolist(), dtype=np.float32)[:, np.newaxis]
feature2 = np.array(data['EXAM2'].tolist(), dtype=np.float32)[:, np.newaxis]
feature3 = np.array(data['EXAM3'].tolist(), dtype=np.float32)[:, np.newaxis]
y_data = np.array(data['FINAL'].tolist(), dtype=np.float32)

In [5]:
# normalize features
feature1 /= np.max(feature1)
feature2 /= np.max(feature2)
feature3 /= np.max(feature3)

In [6]:
# join x data to feature matrix
x_data = np.hstack((feature1, feature2, feature3))
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, train_size=0.9)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(22, 3) (3, 3) (22,) (3,)


In [7]:
print("fitting classifier...\n")
# instantiate LinearRegression model
lrgs = LinearRegression(epochs=100000, lmb=1e-5, lr=0.01, sgd=8)
# fit the model to the data
lrgs.fit(X_train, y_train, verbose=True, print_iters=10000)

fitting classifier...

iteration 10000: cost: 6.635002381166881
iteration 20000: cost: 3.8324490847749213
iteration 30000: cost: 6.953561153279497
iteration 40000: cost: 2.601851951474389
iteration 50000: cost: 5.372986379501264
iteration 60000: cost: 1.6986889878479028
iteration 70000: cost: 5.964525819044338
iteration 80000: cost: 5.43222584844286
iteration 90000: cost: 1.562027548362251
iteration 100000: cost: 0.5803967866257704
iteration 110000: cost: 4.31156165475693
iteration 120000: cost: 4.833321618936568
iteration 130000: cost: 4.891009356542694
iteration 140000: cost: 1.42518249974365
iteration 150000: cost: 2.0514897088231314
iteration 160000: cost: 1.6392096668326606
iteration 170000: cost: 4.076777508965681
iteration 180000: cost: 2.5131689963172703
iteration 190000: cost: 1.2061420074735674
iteration 200000: cost: 4.530072170867054


In [8]:
# create preds and calculate errors
preds = lrgs.predict(X_test)

for i in range(len(preds)):
    print(preds[i], y_test[i])

print("train error", lrgs.error(X_train, y_train))
print("test  error", lrgs.error(X_test, y_test))

158.52608546744585 159.0
151.54063550980607 148.0
200.39313014272503 196.0
train error 2.6066912361746066
test  error 5.343381208064149
