In [1]:
from calendar import c
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from mlr import MLR


In [2]:
def calculateEvalMeasures(pred, targets, verbose = True):
    m = len(pred)
    mse = (((pred - targets)**2).sum() / (m))[0]
    mae = ((abs(pred - targets)).sum() / (m))[0]
    rmse = np.sqrt(mse)
    rss = (((targets-pred)**2).sum())[0]
    tss = (((targets-targets.mean())**2).sum())[0]
    r2 = 1 - (rss/tss)
    if verbose:
        print(r2.shape)
        print("R^2:", r2)
        print("Mean Absolute Error:", mae)
        print("MSE:", mse)
        print("RMSE:", rmse)
    return r2, mae, mse, rmse

In [5]:
dataset = pd.read_csv("data/Folds5x2_pp.csv")
targetVarName = "PE"
mlr = MLR(dataset, targetVarName, "AUTO")
pred, params = mlr.learnParams(0.1)
# Training evaluation
targets = mlr.target
print("== Training results with Autoscaling ==")
r2, mae, mse, rmse = calculateEvalMeasures(pred, targets, True)

mlr = MLR(dataset, targetVarName, "MINMAX")
pred, params = mlr.learnParams(0.1)
# Training evaluation
targets = mlr.target
print("== Training results with Range-scaling (min-max) ==")
r2, mae, mse, rmse = calculateEvalMeasures(pred, targets, True)

== Training results with Autoscaling ==
()
R^2: 0.9283701302236594
Mean Absolute Error: 3.6382507443803247
MSE: 20.862333873866845
RMSE: 4.56753039112679
== Training results with Range-scaling (min-max) ==
()
R^2: 0.9212101470677038
Mean Absolute Error: 3.812184876257848
MSE: 22.947692392557716
RMSE: 4.790374974107738


In [4]:
def crossValidate(dataset, groupNumber, targetVarName, normMethod, verbose):
    dataset = dataset.sample(frac=1).reset_index(drop=True)
    if groupNumber <= 1:
        raise Exception("Set at least 2 cross-validation groups")
    
    cvPartition = dataset.shape[0] // groupNumber
    sumRes = None
    for i in range(groupNumber):        
        initialRow = i * cvPartition
        finalRow = (i + 1) * cvPartition
        testSet = dataset[initialRow:finalRow].reset_index(drop=True)
        trainingSet = dataset.drop(dataset.index[initialRow:finalRow], axis = 0, inplace = False).reset_index(drop=True)
        testTargets = testSet[[targetVarName]]
        testSet = testSet.drop(targetVarName, axis=1)
        cvMlr = MLR(trainingSet, targetVarName, normMethod)
        trainingPred, params = cvMlr.learnParams(0.1)
        testPred = cvMlr.test(testSet)
        strOut = ""
        if normMethod == "AUTO":
            strOut = "Autoscaling"
        elif normMethod == "MINMAX":
            strOut = "Range scaling (min-max)"
        print("== Cross-validation results with", strOut, "==")
        r2, mae, mse, rmse = calculateEvalMeasures(testPred, testTargets, verbose)
        # TODO Add averaging of cv results
        
        
crossValidate(dataset, 10, targetVarName, "AUTO", True)
crossValidate(dataset, 10, targetVarName, "MINMAX", True)

== Cross-validation results with Autoscaling ==
()
R^2: 0.9341471854358621
Mean Absolute Error: 3.6739472120642636
MSE: 19.927990389125778
RMSE: 4.464077775882246
== Cross-validation results with Autoscaling ==
()
R^2: 0.9330403055973232
Mean Absolute Error: 3.5807348960031047
MSE: 19.50369112291616
RMSE: 4.416298350758943
== Cross-validation results with Autoscaling ==
()
R^2: 0.9237236172832899
Mean Absolute Error: 3.7024908346773313
MSE: 21.548781762561
RMSE: 4.642066540083307
== Cross-validation results with Autoscaling ==
()
R^2: 0.9314521648119908
Mean Absolute Error: 3.5863526517211834
MSE: 20.08024073141347
RMSE: 4.481098161323122
== Cross-validation results with Autoscaling ==
()
R^2: 0.9281536463601726
Mean Absolute Error: 3.6561630668885656
MSE: 21.618236658605937
RMSE: 4.649541553594928
== Cross-validation results with Autoscaling ==
()
R^2: 0.9281140472015021
Mean Absolute Error: 3.718726404761917
MSE: 21.41787181569858
RMSE: 4.627944664286575
== Cross-validation results w