In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score, mean_squared_error


In [3]:
df = pd.read_csv('SPY.csv', index_col='Date', parse_dates=True)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,112.370003,113.389999,111.510002,113.330002,92.246048,118944600
2010-01-05,113.260002,113.68,112.849998,113.629997,92.490204,111579900
2010-01-06,113.519997,113.989998,113.43,113.709999,92.555328,116074400
2010-01-07,113.5,114.330002,113.18,114.190002,92.94606,131091100
2010-01-08,113.889999,114.620003,113.660004,114.57,93.255348,126402800


In [4]:
df['CLosePrediction'] = df['Close'].shift(1)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,CLosePrediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,112.370003,113.389999,111.510002,113.330002,92.246048,118944600,
2010-01-05,113.260002,113.68,112.849998,113.629997,92.490204,111579900,113.330002
2010-01-06,113.519997,113.989998,113.43,113.709999,92.555328,116074400,113.629997
2010-01-07,113.5,114.330002,113.18,114.190002,92.94606,131091100,113.709999
2010-01-08,113.889999,114.620003,113.660004,114.57,93.255348,126402800,114.190002


In [5]:
y_true = df.iloc[1:]['Close']
y_pred = df.iloc[1:]['CLosePrediction']

--- Medidas de Desempeño

In [9]:
# Suma de errores cuadráticos medios
sse = (y_true - y_pred).dot(y_true - y_pred)
sse

np.float64(6330.3742894926045)

In [10]:
# Mean Square Error
mse = (y_true - y_pred).dot(y_true - y_pred) / len(y_pred)
mse

np.float64(2.7985739564511958)

In [11]:
# Root Mean Squared Error
rmse = np.sqrt((y_true - y_pred).dot(y_true - y_pred) / len(y_true))
rmse

np.float64(1.6728938867875618)

In [12]:
# Mean Absolute Error
mae = mean_absolute_error(y_true, y_pred)
mae

1.1457559803120336

In [13]:
# R**2
r2 = r2_score(y_true, y_pred)
r2

0.9989603259063914

In [14]:
# Mean Absolute Percentaje Error
mape = mean_absolute_percentage_error(y_true, y_pred)
mape

0.006494073151422373

In [18]:
# Smape
def smape_funcion(y_true, y_pred):
    numerador = np.abs(y_true - y_pred)
    denominador = (np.abs(y_true) + np.abs(y_pred))/2
    ratio = numerador / denominador
    return ratio.mean()
smape_funcion(y_true, y_pred)

np.float64(0.006491365814068417)

In [19]:
def medidas(y_true, y_pred):
    sse = (y_true - y_pred).dot(y_true - y_pred)
    mse = (y_true - y_pred).dot(y_true - y_pred) / len(y_pred)
    rmse = np.sqrt((y_true - y_pred).dot(y_true - y_pred) / len(y_true))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    smape = smape_funcion(y_true, y_pred)
    return {
        "sse" : sse,
        "mse" : mse,
        "rmse": rmse,
        "mae" : mae,
        "r2"  : r2,
        "mape": mape,
        "smape":smape
    }
    

In [None]:
resultados = medidas(y_true, y_pred)
resultados["sse"]

np.float64(6330.3742894926045)