In [11]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import max_error, mean_squared_error, median_absolute_error, mean_absolute_error
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

In [12]:
import sys
sys.path.append("../../") 

In [13]:
def calcular_metricas(y_true, y_predict):
    return {
        "MAX_ERROR": max_error(y_true, y_predict),
        "MEAN_SQ_ERROR": mean_squared_error(y_true, y_predict),
        "MEDIAN_ABS_ERROR": median_absolute_error(y_true, y_predict),
        "MEAN_ABS_ERROR": mean_absolute_error(y_true, y_predict),
    }

In [14]:
def heuristica(Vviento):
    """Definimos la heurística basada en la Escala de Beaufort"""
    if Vviento < 3:
        return 0.4
    elif Vviento < 7:
        return 0.6
    elif Vviento < 12:
        return 1
    elif Vviento < 20:
        return 1.5
    elif Vviento < 26:
        return 2
    elif Vviento < 36:
        return 2.5
    elif Vviento < 46:
        return 3.2
    elif Vviento < 56:
        return 4.2
    elif Vviento < 66:
        return 5.4
    elif Vviento < 76:
        return 6.5
    elif Vviento < 88:
        return 8
    elif Vviento < 101:
        return 10
    elif Vviento >= 101:
        return 12

Cargamos los datos que hemos capturado hasta ahora

In [15]:
data = pd.read_csv("./clean/forecast_merged.csv")
data = data.dropna() #Quitamos todas aquellas filas que tengan algun na

In [16]:
X = data.drop(columns = "AlturaOlas")
X = X.drop(columns=["antelacion", 'anio', 'mes', 'dia', 'hora'])
y = data["AlturaOlas"]

In [17]:
"""X_model = X.copy()
scaler = ColumnTransformer(
    transformers=[
        # no escalonamos las dummy de dirección de viento
        ("num", StandardScaler(), [c for c in X_model.columns if not c.startswith('wdir_')]),
    ],
    remainder='passthrough'
)
scaler.fit(X_model)"""
X_model = scaler.transform(X_model);

In [18]:
X_model

array([[-0.863959  , -0.46688813,  0.75802001, ...,  0.        ,
         1.        ,  0.        ],
       [-1.53206623, -0.46688813,  0.80130328, ...,  1.        ,
         0.        ,  0.        ],
       [-0.863959  , -0.46688813,  0.75802001, ...,  1.        ,
         0.        ,  0.        ],
       ...,
       [ 1.80846993,  0.96422548, -1.36286022, ...,  0.        ,
         0.        ,  0.        ],
       [ 2.47657716,  1.44126335, -3.52702373, ...,  0.        ,
         0.        ,  0.        ],
       [ 2.47657716,  0.96422548, -3.52702373, ...,  0.        ,
         0.        ,  0.        ]])

Cargamos el modelo

In [19]:
with open("../modelos/modelo_definitivo/modelo_rf_def.pkl", 'rb') as file:
    model = pickle.load(file)
model

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [20]:
predicciones_modelo = model.predict(X_model)
predicciones_heuristica = X["Vviento"].apply(heuristica)



In [21]:
predicciones_modelo

array([1.64032195, 1.53535143, 1.53105601, 1.53105601, 1.76156309,
       1.64032195, 1.64032195, 1.53105601, 1.53105601, 1.53105601,
       1.6667244 , 1.64522484, 1.64032195, 1.63854173, 1.63854173,
       1.64522484, 1.64522484, 1.53105601, 1.6801364 , 1.65853847,
       1.53933232, 1.54341081, 1.54341081, 1.53105601, 1.53105601,
       1.53105601, 1.53105601, 1.53105601, 1.53105601, 1.53105601,
       1.53105601, 1.53105601, 1.53105601, 1.53105601, 1.53105601,
       1.53105601, 1.64522484, 1.64522484, 1.6667244 , 1.6667244 ,
       1.6667244 , 1.6667244 , 1.6667244 , 1.6667244 , 1.53105601,
       1.53099129, 1.53105601, 1.53105601, 1.53535143, 1.6801364 ,
       1.6801364 , 1.6801364 , 1.64522484, 1.53105601, 1.54341081,
       1.64522484, 1.53105601, 1.67879312, 1.67879312, 1.6667244 ,
       1.6667244 , 1.6667244 , 1.67879312, 1.6667244 , 1.64522484,
       1.65549187, 1.65549187, 1.53105601, 1.53105601, 1.53105601,
       1.53105601, 1.53105601, 1.53105601, 1.53105601, 1.53105

In [22]:
predicciones_heuristica

0      0.6
1      0.6
2      0.6
3      0.6
4      0.6
      ... 
478    1.0
492    1.0
498    1.0
500    1.0
502    1.0
Name: Vviento, Length: 423, dtype: float64

In [23]:
y

0      0.8
1      0.7
2      0.7
3      0.7
4      0.8
      ... 
478    0.9
492    0.7
498    0.9
500    0.7
502    0.7
Name: AlturaOlas, Length: 423, dtype: float64

In [24]:
calcular_metricas(y, predicciones_modelo)

{'MAX_ERROR': 1.1874677124731987,
 'MEAN_SQ_ERROR': 0.8650299323933854,
 'MEDIAN_ABS_ERROR': 0.9452248385698772,
 'MEAN_ABS_ERROR': 0.9031317001155771}

In [25]:
calcular_metricas(y, predicciones_heuristica)

{'MAX_ERROR': 0.6,
 'MEAN_SQ_ERROR': 0.07000000000000002,
 'MEDIAN_ABS_ERROR': 0.09999999999999998,
 'MEAN_ABS_ERROR': 0.1978723404255319}