In [1]:
#
# Empaquetado del entrenamiento del modelo
#
def train_estimator(alpha=0.5, l1_ratio=0.5, verbose=1):

    import os
    import pickle

    import pandas as pd
    from sklearn.linear_model import ElasticNet
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    from sklearn.model_selection import train_test_split

    url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    df = pd.read_csv(url, sep=";")

    y = df["quality"]
    x = df.copy()
    x.pop("quality")

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=0,
    )

    estimator = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=12345)

    estimator.fit(x_train, y_train)
    y_pred = estimator.predict(x_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    if verbose > 0:
        print(estimator, ":", sep="")
        print(f"  MSE: {mse}")
        print(f"  MAE: {mae}")
        print(f"  R2: {r2}")

    if not os.path.exists("estimator.pickle"):
        saved_estimator = None
    else:
        with open("estimator.pickle", "rb") as file:
            saved_estimator = pickle.load(file)

    if saved_estimator is None or estimator.score(
        x_test, y_test
    ) > saved_estimator.score(x_test, y_test):
        with open("estimator.pickle", "wb") as file:
            pickle.dump(estimator, file)


In [2]:
#
# Experimiento
#
train_estimator(0.1, 0.9)

ElasticNet(alpha=0.1, l1_ratio=0.9, random_state=12345):
  MSE: 0.4444813386749448
  MAE: 0.5293539248566913
  R2: 0.2727652423230035


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
#
# Uso del modelo en productivo
#
def use_estimator():

    import pandas as pd
    import pickle

    url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    df = pd.read_csv(url, sep=";")

    y = df["quality"]
    x = df.copy()
    x.pop("quality")

    with open("estimator.pickle", "rb") as file:
        estimator = pickle.load(file)

    y_pred = estimator.predict(x)

    return y_pred

use_estimator()

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


array([5.04471258, 5.09836647, 5.20264959, ..., 5.94512405, 5.44101164,
       6.07251942], shape=(1599,))

In [4]:
#
# Carga de datos
#
def load_data():

    import pandas as pd

    url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    df = pd.read_csv(url, sep=";")

    y = df["quality"]
    x = df.copy()
    x.pop("quality")

    return x, y

In [5]:
#
# Particionamiento de datos
#
def make_train_test_split(x, y):

    from sklearn.model_selection import train_test_split

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=0,
    )
    return x_train, x_test, y_train, y_test    
    
    

In [6]:
#
# Cálculo de metricas de evaluación
#
def eval_metrics(y_true, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2
    
    

In [7]:
    
#
# Reporte de métricas de evaluación
#
def report(estimator, mse, mae, r2):

    print(estimator, ":", sep="")
    print(f"  MSE: {mse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")
    
    

In [8]:
    
#
# Almacenamiento del modelo
#
def save_best_estimator(estimator):

    import os
    import pickle

    with open("estimator.pickle", "wb") as file:
        pickle.dump(estimator, file)
        
        

In [9]:

#
# Carga del modelo
#
def load_best_estimator():

    import os
    import pickle

    if not os.path.exists("estimator.pickle"):
        return None
    with open("estimator.pickle", "rb") as file:
        estimator = pickle.load(file)

    return estimator
    
    

In [10]:
    
#
# Entrenamiento
#
def train_estimator(alpha=0.5, l1_ratio=0.5, verbose=1):

    from sklearn.linear_model import ElasticNet

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)
    estimator = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=12345)
    estimator.fit(x_train, y_train)
    mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
    if verbose > 0:
        report(estimator, mse, mae, r2)

    best_estimator = load_best_estimator()
    if best_estimator is None or estimator.score(x_test, y_test) > best_estimator.score(
        x_test, y_test
    ):
        best_estimator = estimator

    save_best_estimator(best_estimator)
    

In [11]:
train_estimator(0.5, 0.5)
train_estimator(0.2, 0.2)
train_estimator(0.1, 0.1)


ElasticNet(alpha=0.5, random_state=12345):
  MSE: 0.5294843132862007
  MAE: 0.5894666734018875
  R2: 0.13368827268570616


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


ElasticNet(alpha=0.2, l1_ratio=0.2, random_state=12345):
  MSE: 0.4386911951894716
  MAE: 0.5236106762028768
  R2: 0.2822387414965033
ElasticNet(alpha=0.1, l1_ratio=0.1, random_state=12345):
  MSE: 0.4183271587407731
  MAE: 0.5055024368693067
  R2: 0.31555720466583137


In [12]:
def check_estimator():

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)
    estimator = load_best_estimator()
    mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
    report(estimator, mse, mae, r2)


#
# Debe coincidir con el mejor modelo encontrado en la celdas anteriores
#
check_estimator()

ElasticNet(alpha=np.float64(0.0001), l1_ratio=np.float64(0.0001),
           random_state=12345):
  MSE: 0.40021745821413146
  MAE: 0.48480048551721366
  R2: 0.34518725328239785


In [13]:
def make_hyperparameters_search(alphas, l1_ratios):

    for alpha in alphas:
        for l1_ratio in l1_ratios:
            train_estimator(alpha=alpha, l1_ratio=l1_ratio, verbose=0)
            

    
import numpy as np

alphas = np.linspace(0.0001, 0.5, 10)
l1_ratios = np.linspace(0.0001, 0.5, 10)
make_hyperparameters_search(alphas, l1_ratios)
check_estimator()

ElasticNet(alpha=np.float64(0.0001), l1_ratio=np.float64(0.0001),
           random_state=12345):
  MSE: 0.40021745821413146
  MAE: 0.48480048551721366
  R2: 0.34518725328239785


In [14]:

def train_estimator(alphas, l1_ratios, n_splits=5, verbose=1):

    from sklearn.linear_model import ElasticNet
    from sklearn.model_selection import GridSearchCV

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    # -------------------------------------------------------------------------
    # Búsqueda de parámetros con validación cruzada
    #
    estimator = GridSearchCV(
        estimator=ElasticNet(
            random_state=12345,
        ),
        param_grid={
            "alpha": alphas,
            "l1_ratio": l1_ratios,
        },
        cv=n_splits,
        refit=True,
        verbose=0,
        return_train_score=False,
    )
    # -------------------------------------------------------------------------

    estimator.fit(x_train, y_train)

    estimator = estimator.best_estimator_

    mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
    if verbose > 0:
        report(estimator, mse, mae, r2)

    best_estimator = load_best_estimator()
    if best_estimator is None or estimator.score(x_test, y_test) > best_estimator.score(
        x_test, y_test
    ):
        best_estimator = estimator

    save_best_estimator(best_estimator)
    
    

In [15]:
import numpy as np

train_estimator(
    alphas=np.linspace(0.0001, 0.5, 10),
    l1_ratios=np.linspace(0.0001, 0.5, 10),
    n_splits=5,
    verbose=1,
)



check_estimator()


ElasticNet(alpha=np.float64(0.0001), l1_ratio=np.float64(0.0001),
           random_state=12345):
  MSE: 0.40021745821413146
  MAE: 0.48480048551721355
  R2: 0.34518725328239785
ElasticNet(alpha=np.float64(0.0001), l1_ratio=np.float64(0.0001),
           random_state=12345):
  MSE: 0.40021745821413146
  MAE: 0.48480048551721366
  R2: 0.34518725328239785


In [None]:
# Última celda: asegurar que el estimador se guarde en la raíz del repo
import os, pickle
from pathlib import Path

# Intentar cargar un estimador ya guardado (prioridad: raíz, luego homework/)
candidates = [Path('estimator.pickle'), Path('homework') / 'estimator.pickle']
estimator = None
for p in candidates:
    if p.exists():
        with p.open('rb') as f:
            estimator = pickle.load(f)
        print(f'Loaded estimator from {p}')
        break

# Si no existe, entrenar un modelo simple usando ElasticNet (como en el cuaderno)
if estimator is None:
    print('No estimator found; training a new one.')
    import pandas as pd
    from sklearn.linear_model import ElasticNet
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

    url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
    df = pd.read_csv(url, sep=';')
    y = df['quality']
    X = df.drop(columns=['quality'])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
    estimator = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=12345)
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f'Trained estimator - MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}')

# Determinar la raíz del repositorio: si estamos dentro de 'homework', subir un nivel
cwd = Path.cwd()
if (cwd / 'homework').exists():
    repo_root = cwd
elif cwd.name == 'homework':
    repo_root = cwd.parent
else:
    # Fallback: buscar carpeta 'homework' en ancestros
    repo_root = cwd
    for p in cwd.parents:
        if (p / 'homework').exists():
            repo_root = p
            break

save_path = repo_root / 'estimator.pickle'
with save_path.open('wb') as f:
    pickle.dump(estimator, f)
print('Saved estimator to:', save_path)