In [6]:
%%capture
%load_ext autoreload
%autoreload 2

%pip install -r ../../requirements.txt

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics
import xgboost as xg
from xgboost import XGBRegressor


def one_hot_encode(
    df: pd.DataFrame,
    column: str,
    prefix: str,
) -> pd.DataFrame:

    df_ = df.copy()
    df_encoded = pd.get_dummies(df_[column], dtype="int")
    for col in df_encoded:
        df_[f"{prefix}_{col}"] = df_encoded[col]

    return df_.drop(columns=[column])

In [33]:
vuelos = pd.read_parquet("../../resources/flights_clean.parquet")
ventas = pd.read_parquet("../../resources/sales_clean.parquet")

---

In [34]:
# Merge a ambos dataframes
vuelos_ventas = pd.merge(ventas, vuelos, on="Flight_ID")
vuelos_ventas.drop(columns=["index", "Aeronave"], inplace=True)
vuelos_ventas.drop(columns=["HourDuration"], inplace=True)

In [35]:
vuelos_ventas = vuelos_ventas.groupby("Flight_ID").agg(
    {
        "TotalSales": "sum",
        "Bookings": "mean",
        "Passengers": "mean",
        "Origin_Type": "first",
        "Destination_Type": "first",
        "STD": "first",
        "STA": "first",
        "DepartureStation": "first",
        "ArrivalStation": "first",
        "Semana": "first",
        "Mes": "first",
        "Capacity": "first",
    }
)
vuelos_ventas["STD"] = pd.to_datetime(vuelos_ventas["STD"])


vuelos_ventas["STA"] = pd.to_datetime(vuelos_ventas["STA"])


vuelos_ventas["Duracion"] = vuelos_ventas["STA"] - vuelos_ventas["STD"]


vuelos_ventas["Duracion"] = vuelos_ventas["Duracion"].dt.total_seconds() / 60


vuelos_ventas["Hora"] = vuelos_ventas["STD"].dt.hour


vuelos_ventas["Dia"] = vuelos_ventas["STD"].dt.day


# Cambiar tipos de destinos a one-hot encoding


destination_type = vuelos_ventas["Destination_Type"].map(
    {
        "Ciudad Principal": "destino_ciudadprincipal",
        "Playa": "destino_playa",
        "MX Amigos y Familia": "destino_amigosfamilia",
        "Ciudad Fronteriza": "destino_ciudadfronteriza",
        "Ecoturismo": "destino_ecoturismo",
    }
)


destination_type = pd.get_dummies(destination_type, dtype="int")


vuelos_ventas = (
    pd.concat([vuelos_ventas, destination_type], axis=1)
    if "Destination_Type" in vuelos_ventas.columns
    else vuelos_ventas
)


vuelos_ventas = (
    vuelos_ventas.drop(columns=["Destination_Type"])
    if "Destination_Type" in vuelos_ventas.columns
    else vuelos_ventas
)


origen_type = vuelos_ventas["Origin_Type"].map(
    {
        "Ciudad Principal": "origen_ciudadprincipal",
        "Playa": "origen_playa",
        "MX Amigos y Familia": "origen_amigosfamilia",
        "Ciudad Fronteriza": "origen_ciudadfronteriza",
        "Ecoturismo": "origen_ecoturismo",
    }
)


origen_type = pd.get_dummies(origen_type, dtype="int")


vuelos_ventas = (
    pd.concat([vuelos_ventas, origen_type], axis=1)
    if "Origin_Type" in vuelos_ventas.columns
    else vuelos_ventas
)


vuelos_ventas = (
    vuelos_ventas.drop(columns=["Origin_Type"])
    if "Origin_Type" in vuelos_ventas.columns
    else vuelos_ventas
)


vuelos_ventas["Book_Pass"] = vuelos_ventas["Bookings"] / vuelos_ventas["Passengers"]


vuelos_ventas.drop(columns=["STD", "STA"], inplace=True)


vuelos_ventas = one_hot_encode(vuelos_ventas, "DepartureStation", "departure")


vuelos_ventas = one_hot_encode(vuelos_ventas, "ArrivalStation", "arrival")

In [36]:
vuelos_ventas.columns

Index(['TotalSales', 'Bookings', 'Passengers', 'Semana', 'Mes', 'Capacity',
       'Duracion', 'Hora', 'Dia', 'destino_amigosfamilia',
       'destino_ciudadfronteriza', 'destino_ciudadprincipal',
       'destino_ecoturismo', 'destino_playa', 'origen_amigosfamilia',
       'origen_ciudadfronteriza', 'origen_ciudadprincipal',
       'origen_ecoturismo', 'origen_playa', 'Book_Pass', 'departure_AB',
       'departure_AD', 'departure_AE', 'departure_AF', 'departure_AI',
       'departure_AJ', 'departure_AK', 'departure_AL', 'departure_AM',
       'departure_AO', 'departure_AP', 'departure_AQ', 'departure_AR',
       'departure_AS', 'departure_AT', 'departure_AU', 'departure_AV',
       'departure_AW', 'departure_AX', 'departure_AY', 'departure_AZ',
       'departure_BA', 'departure_BB', 'departure_BC', 'departure_BD',
       'departure_BE', 'departure_BF', 'departure_BG', 'departure_BH',
       'departure_BJ', 'departure_BK', 'departure_BL', 'departure_BM',
       'departure_BN', 'departur

In [37]:
# Hacemos el modelo de regesion
X = vuelos_ventas.drop(columns=["TotalSales"])
y = vuelos_ventas["TotalSales"]

In [38]:
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r111 = XGBRegressor(n_estimators=10, seed=123)
xgb_r = XGBRegressor(learning_rate=0.3, n_estimators=250)
xgb_r.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r.predict(x_test)

[0]	validation_0-rmse:2423.67590
[1]	validation_0-rmse:2178.39212
[2]	validation_0-rmse:1970.45573
[3]	validation_0-rmse:1851.13258
[4]	validation_0-rmse:1769.02827
[5]	validation_0-rmse:1712.93911
[6]	validation_0-rmse:1679.49640
[7]	validation_0-rmse:1649.09794
[8]	validation_0-rmse:1627.49642
[9]	validation_0-rmse:1609.91789
[10]	validation_0-rmse:1602.62368
[11]	validation_0-rmse:1590.98191
[12]	validation_0-rmse:1585.64248
[13]	validation_0-rmse:1579.38273
[14]	validation_0-rmse:1573.03351
[15]	validation_0-rmse:1570.45722
[16]	validation_0-rmse:1565.82374
[17]	validation_0-rmse:1562.77591
[18]	validation_0-rmse:1556.40204
[19]	validation_0-rmse:1553.59869
[20]	validation_0-rmse:1550.78350
[21]	validation_0-rmse:1549.75763
[22]	validation_0-rmse:1546.34343
[23]	validation_0-rmse:1541.91109
[24]	validation_0-rmse:1539.13983
[25]	validation_0-rmse:1538.59625
[26]	validation_0-rmse:1535.04921
[27]	validation_0-rmse:1534.75765
[28]	validation_0-rmse:1532.79057
[29]	validation_0-rmse:1

In [39]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.7099103438567037
Mean Absolute Error: 967.0118727446586
Mean Squared Error: 2330409.519359125
Root Mean Squared Error: 1526.567888879864


In [40]:
vuelos_ventas.to_parquet("../../resources/flights_sales_pred.parquet")

## **Modelo cantidad**

In [16]:
# Merge a ambos dataframes
vuelos_ventas = pd.merge(ventas, vuelos, on="Flight_ID")
vuelos_ventas.drop(columns=["index", "Aeronave"], inplace=True)
vuelos_ventas.head()

Unnamed: 0,Flight_ID,ProductType,ProductName,Quantity,TotalSales,DepartureStation,ArrivalStation,Destination_Type,Origin_Type,STD,STA,Capacity,Passengers,Bookings,Mes,Semana,DiaSemana,Hora,HourDuration
0,a05290288259526edd3601160b10e1de,Botanas,Carne Seca Habanero,1,55.0,AW,AO,Ciudad Principal,Ciudad Principal,2023-08-28 19:20:00,2023-08-28 20:45:00,186,131.0,100.0,8,35,1,19,1.416667
1,08f6f97437df8db101b050f1110be656,Licores,Jw Red Label,2,240.0,AO,AU,Ecoturismo,Ciudad Principal,2023-05-15 10:35:00,2023-05-15 12:55:00,186,185.0,128.0,5,20,1,10,2.333333
2,4e09c949826a77207868412baeff6d30,Licores,Jack And Coke,8,576.0,BN,AK,Playa,MX Amigos y Familia,2023-09-24 18:05:00,2023-09-24 21:25:00,186,186.0,133.0,9,38,7,18,3.333333
3,4e09c949826a77207868412baeff6d30,Licores,Jack And Coke,8,576.0,BN,AK,Playa,MX Amigos y Familia,2023-09-24 18:05:00,2023-09-24 21:25:00,186,186.0,133.0,9,38,7,18,3.333333
4,4e09c949826a77207868412baeff6d30,Licores,Jack And Coke,8,576.0,BH,BN,MX Amigos y Familia,Playa,2023-09-24 14:30:00,2023-09-24 17:35:00,186,185.0,118.0,9,38,7,14,3.083333


In [17]:
vuelos_ventas["STD"] = pd.to_datetime(vuelos_ventas["STD"])
vuelos_ventas["STA"] = pd.to_datetime(vuelos_ventas["STA"])
vuelos_ventas["Duracion"] = vuelos_ventas["STA"] - vuelos_ventas["STD"]
vuelos_ventas["Duracion"] = vuelos_ventas["Duracion"].dt.total_seconds() / 60
vuelos_ventas["Hora"] = vuelos_ventas["STD"].dt.hour
vuelos_ventas["Dia"] = vuelos_ventas["STD"].dt.day
# Cambiar tipos de destinos a one-hot encoding
destination_type = vuelos_ventas["Destination_Type"].map(
    {
        "Ciudad Principal": "destino_ciudadprincipal",
        "Playa": "destino_playa",
        "MX Amigos y Familia": "destino_amigosfamilia",
        "Ciudad Fronteriza": "destino_ciudadfronteriza",
        "Ecoturismo": "destino_ecoturismo",
    }
)
destination_type = pd.get_dummies(destination_type, dtype="int")
vuelos_ventas = (
    pd.concat([vuelos_ventas, destination_type], axis=1)
    if "Destination_Type" in vuelos_ventas.columns
    else vuelos_ventas
)
vuelos_ventas = (
    vuelos_ventas.drop(columns=["Destination_Type"])
    if "Destination_Type" in vuelos_ventas.columns
    else vuelos_ventas
)
origen_type = vuelos_ventas["Origin_Type"].map(
    {
        "Ciudad Principal": "origen_ciudadprincipal",
        "Playa": "origen_playa",
        "MX Amigos y Familia": "origen_amigosfamilia",
        "Ciudad Fronteriza": "origen_ciudadfronteriza",
        "Ecoturismo": "origen_ecoturismo",
    }
)
origen_type = pd.get_dummies(origen_type, dtype="int")
vuelos_ventas = (
    pd.concat([vuelos_ventas, origen_type], axis=1)
    if "Origin_Type" in vuelos_ventas.columns
    else vuelos_ventas
)
vuelos_ventas = (
    vuelos_ventas.drop(columns=["Origin_Type"])
    if "Origin_Type" in vuelos_ventas.columns
    else vuelos_ventas
)
vuelos_ventas["Book_Pass"] = vuelos_ventas["Bookings"] / vuelos_ventas["Passengers"]
vuelos_ventas.drop(columns=["STD", "STA"], inplace=True)
vuelos_ventas = one_hot_encode(vuelos_ventas, "DepartureStation", "departure")
vuelos_ventas = one_hot_encode(vuelos_ventas, "ArrivalStation", "arrival")

In [18]:
vuelos_ventas.drop(columns=["Flight_ID", "ProductType", "TotalSales"], inplace=True)

In [19]:
product_dict = {}
for i in vuelos_ventas["ProductName"].unique():
    product_dict[i] = len(product_dict)
vuelos_ventas["ProductName"] = vuelos_ventas["ProductName"].map(product_dict)
product_dict_inv = {val: key for key, val in product_dict.items()}
product_dict_inv

{0: 'Carne Seca Habanero',
 1: 'Jw Red Label ',
 2: 'Jack And Coke',
 3: 'Jw Red Label',
 4: 'Ron Bacardi',
 5: 'Baileys',
 6: 'Corajillo',
 7: 'Muffin Integral',
 8: 'Tequila 7 Leguas Reposado',
 9: 'Arandano Mango Mix',
 10: 'Quaker Granola',
 11: 'Tequila 7 Leguas Blanco',
 12: 'Sol Clamato',
 13: 'Quaker Avena Frutos Rojos',
 14: 'Go Nuts',
 15: 'Arandano',
 16: 'Nutty Berry Mix',
 17: 'Frutos Secos Enchilados',
 18: 'Te Relax',
 19: 'Dip De Queso',
 20: 'Te Frutos Rojos',
 21: 'Ultra Seltzer Frambuesa',
 22: 'Corajillo Baileys ',
 23: 'Vino Tinto Cria Cuervos',
 24: 'Te Manzanilla Jengibre',
 25: 'Baileys ',
 26: 'Nueces De Arbol Mix',
 27: 'Carne Seca Original',
 28: 'Luxury Nut Mix',
 29: 'Galleta De Chispas De Chocolate',
 30: 'Tostitos Nachos Con Dip',
 31: 'Protein Adventure',
 32: 'Vino Blanco Cria Cuervos ',
 33: 'Galleta De Chocolate',
 34: 'Topochico Seltzer Fresa-Guayaba',
 35: 'Te Vainilla',
 36: 'Cafe De Olla',
 37: 'Galleta De Arandano Relleno De Q/Crema',
 38: 'Cafe 

---

In [20]:
def parse_model_name(index: int):
    """
    index se refiere al índice de `product_dict_inv`
    """
    return "_".join(product_dict_inv[index].lower().replace("/", "_").split())

In [21]:
import pickle

for i in range(1, 97):
    try:
        model = pickle.load(open(f"models/Modelo{i}.pkl", "rb"))
        name = "_".join(product_dict_inv[i].lower().replace("/", "_").split())
        pickle.dump(model, open(f"renamed/Modelo-{name}.pkl", "wb"))
    except FileNotFoundError as fe:
        print(f"Modelo{i} no existe")
        continue

Modelo88 no existe
Modelo92 no existe


In [22]:
product_dict_inv[88], product_dict_inv[92]

('Agua Natural 600 Ml', 'Sabritas Originales')

---

In [23]:
# Encontrar los productos mas vendidos
product_sales = vuelos_ventas.groupby("ProductName")["Quantity"].sum()
product_sales = product_sales.sort_values(ascending=False)
top10 = product_sales.index[:10]
print([product_dict_inv[i] for i in top10])

['Coca Cola Regular', 'Agua Natural 600 Ml', 'Sabritas Originales', 'Ruffles Queso', 'Doritos Nacho', 'Panini Clasico', 'Fritos Limon Y Sal', 'Sabritas Flamin Hot', 'Sprite', 'Cheetos']


## **Coca**

In [24]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[0]]

In [25]:
vuelos_ventas.groupby("ProductName")["Quantity"].median()[9]

1.0

In [26]:
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]

In [27]:
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r2 = XGBRegressor(n_estimators=10, seed=123)
xgb_r1 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r1.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r1.predict(x_test)

[0]	validation_0-rmse:5.12990
[1]	validation_0-rmse:4.75852
[2]	validation_0-rmse:4.53123
[3]	validation_0-rmse:4.45371
[4]	validation_0-rmse:4.40641
[5]	validation_0-rmse:4.36719
[6]	validation_0-rmse:4.34600
[7]	validation_0-rmse:4.32918
[8]	validation_0-rmse:4.31454
[9]	validation_0-rmse:4.30733
[10]	validation_0-rmse:4.30069
[11]	validation_0-rmse:4.28060
[12]	validation_0-rmse:4.27492
[13]	validation_0-rmse:4.27219
[14]	validation_0-rmse:4.26846
[15]	validation_0-rmse:4.26452
[16]	validation_0-rmse:4.26249
[17]	validation_0-rmse:4.26143
[18]	validation_0-rmse:4.25583
[19]	validation_0-rmse:4.24756
[20]	validation_0-rmse:4.24729
[21]	validation_0-rmse:4.24745
[22]	validation_0-rmse:4.24774
[23]	validation_0-rmse:4.24567
[24]	validation_0-rmse:4.24407
[25]	validation_0-rmse:4.24264
[26]	validation_0-rmse:4.24242
[27]	validation_0-rmse:4.24199
[28]	validation_0-rmse:4.24187
[29]	validation_0-rmse:4.23897
[30]	validation_0-rmse:4.23811
[31]	validation_0-rmse:4.23968
[32]	validation_0-

In [28]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print(
    "Root Mean Squared Error:",
    np.sqrt(metrics.mean_squared_error(y_test, y_pred)),
)

R2 Score: 0.4771023970857141
Mean Absolute Error: 3.1268051003684874
Mean Squared Error: 19.470950334198804
Root Mean Squared Error: 4.412589980294884


## **Agua**

In [29]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[1]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r3 = XGBRegressor(n_estimators=10, seed=123)
xgb_r2 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r2.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r2.predict(x_test)

[0]	validation_0-rmse:4.21718
[1]	validation_0-rmse:3.97285
[2]	validation_0-rmse:3.84824
[3]	validation_0-rmse:3.78885
[4]	validation_0-rmse:3.74258
[5]	validation_0-rmse:3.72111
[6]	validation_0-rmse:3.70416
[7]	validation_0-rmse:3.68604
[8]	validation_0-rmse:3.66328
[9]	validation_0-rmse:3.65638
[10]	validation_0-rmse:3.64983
[11]	validation_0-rmse:3.63578
[12]	validation_0-rmse:3.62249
[13]	validation_0-rmse:3.61325
[14]	validation_0-rmse:3.61271
[15]	validation_0-rmse:3.60981
[16]	validation_0-rmse:3.60512
[17]	validation_0-rmse:3.60146
[18]	validation_0-rmse:3.58780
[19]	validation_0-rmse:3.58762
[20]	validation_0-rmse:3.58631
[21]	validation_0-rmse:3.58265
[22]	validation_0-rmse:3.57882
[23]	validation_0-rmse:3.57486
[24]	validation_0-rmse:3.57319
[25]	validation_0-rmse:3.57220
[26]	validation_0-rmse:3.57230
[27]	validation_0-rmse:3.56941
[28]	validation_0-rmse:3.57209
[29]	validation_0-rmse:3.56791
[30]	validation_0-rmse:3.56845
[31]	validation_0-rmse:3.56771
[32]	validation_0-

KeyboardInterrupt: 

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.4365865655890262
Mean Absolute Error: 2.6145307317859143
Mean Squared Error: 13.794136460462491
Root Mean Squared Error: 3.7140458344590326


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Sabritas Originales**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[2]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r4 = XGBRegressor(n_estimators=10, seed=123)
xgb_r3 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r3.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r3.predict(x_test)

[0]	validation_0-rmse:2.91804
[1]	validation_0-rmse:2.75313
[2]	validation_0-rmse:2.67716
[3]	validation_0-rmse:2.63141
[4]	validation_0-rmse:2.60938
[5]	validation_0-rmse:2.59443
[6]	validation_0-rmse:2.58884
[7]	validation_0-rmse:2.58248
[8]	validation_0-rmse:2.57777
[9]	validation_0-rmse:2.57351
[10]	validation_0-rmse:2.57328
[11]	validation_0-rmse:2.57013
[12]	validation_0-rmse:2.56850
[13]	validation_0-rmse:2.56779
[14]	validation_0-rmse:2.56734
[15]	validation_0-rmse:2.56630
[16]	validation_0-rmse:2.56713
[17]	validation_0-rmse:2.56775
[18]	validation_0-rmse:2.56782
[19]	validation_0-rmse:2.56895
[20]	validation_0-rmse:2.56828
[21]	validation_0-rmse:2.56505
[22]	validation_0-rmse:2.56335
[23]	validation_0-rmse:2.56416
[24]	validation_0-rmse:2.56410
[25]	validation_0-rmse:2.56403
[26]	validation_0-rmse:2.56509
[27]	validation_0-rmse:2.56528
[28]	validation_0-rmse:2.56511
[29]	validation_0-rmse:2.56535
[30]	validation_0-rmse:2.56691
[31]	validation_0-rmse:2.56656
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.3314282488274055
Mean Absolute Error: 1.9254828933585648
Mean Squared Error: 7.2951961908425185
Root Mean Squared Error: 2.700962086154213


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Ruffles Queso**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[3]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r5 = XGBRegressor(n_estimators=10, seed=123)
xgb_r4 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r4.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r4.predict(x_test)

[0]	validation_0-rmse:2.52083
[1]	validation_0-rmse:2.36275
[2]	validation_0-rmse:2.30511
[3]	validation_0-rmse:2.27700
[4]	validation_0-rmse:2.25893
[5]	validation_0-rmse:2.24515
[6]	validation_0-rmse:2.23995
[7]	validation_0-rmse:2.23067
[8]	validation_0-rmse:2.22466
[9]	validation_0-rmse:2.21904
[10]	validation_0-rmse:2.21252
[11]	validation_0-rmse:2.21176
[12]	validation_0-rmse:2.21150
[13]	validation_0-rmse:2.21062
[14]	validation_0-rmse:2.21018
[15]	validation_0-rmse:2.20850
[16]	validation_0-rmse:2.20734
[17]	validation_0-rmse:2.20538
[18]	validation_0-rmse:2.20591
[19]	validation_0-rmse:2.20734
[20]	validation_0-rmse:2.20644
[21]	validation_0-rmse:2.20530
[22]	validation_0-rmse:2.20591
[23]	validation_0-rmse:2.20508
[24]	validation_0-rmse:2.20549
[25]	validation_0-rmse:2.20555
[26]	validation_0-rmse:2.20559
[27]	validation_0-rmse:2.20608
[28]	validation_0-rmse:2.20669
[29]	validation_0-rmse:2.20703
[30]	validation_0-rmse:2.20701
[31]	validation_0-rmse:2.20817
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.3468752555202571
Mean Absolute Error: 1.6364877067455268
Mean Squared Error: 5.375963540035936
Root Mean Squared Error: 2.318612416950262


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Doritos Nacho**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[4]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r6 = XGBRegressor(n_estimators=10, seed=123)
xgb_r5 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r5.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r5.predict(x_test)

[0]	validation_0-rmse:2.38185
[1]	validation_0-rmse:2.28740
[2]	validation_0-rmse:2.23465
[3]	validation_0-rmse:2.20553
[4]	validation_0-rmse:2.18864
[5]	validation_0-rmse:2.17644
[6]	validation_0-rmse:2.16529
[7]	validation_0-rmse:2.15717
[8]	validation_0-rmse:2.15440
[9]	validation_0-rmse:2.15325
[10]	validation_0-rmse:2.14803
[11]	validation_0-rmse:2.14558
[12]	validation_0-rmse:2.14528
[13]	validation_0-rmse:2.14391
[14]	validation_0-rmse:2.13974
[15]	validation_0-rmse:2.14087
[16]	validation_0-rmse:2.14157
[17]	validation_0-rmse:2.14131
[18]	validation_0-rmse:2.14235
[19]	validation_0-rmse:2.14124
[20]	validation_0-rmse:2.14165
[21]	validation_0-rmse:2.14085
[22]	validation_0-rmse:2.14157
[23]	validation_0-rmse:2.14083
[24]	validation_0-rmse:2.14118
[25]	validation_0-rmse:2.14080
[26]	validation_0-rmse:2.13837
[27]	validation_0-rmse:2.13954
[28]	validation_0-rmse:2.13967
[29]	validation_0-rmse:2.13979
[30]	validation_0-rmse:2.13964
[31]	validation_0-rmse:2.13949
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.2696174940641838
Mean Absolute Error: 1.5986242804714084
Mean Squared Error: 5.120051293919524
Root Mean Squared Error: 2.262753034230542


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Panini Original**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[5]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r7 = XGBRegressor(n_estimators=10, seed=123)
xgb_r6 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r6.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r6.predict(x_test)

[0]	validation_0-rmse:1.87736
[1]	validation_0-rmse:1.80173
[2]	validation_0-rmse:1.76571
[3]	validation_0-rmse:1.74993
[4]	validation_0-rmse:1.74083
[5]	validation_0-rmse:1.73133
[6]	validation_0-rmse:1.72662
[7]	validation_0-rmse:1.72525
[8]	validation_0-rmse:1.72412
[9]	validation_0-rmse:1.72198
[10]	validation_0-rmse:1.72189
[11]	validation_0-rmse:1.72162
[12]	validation_0-rmse:1.72061
[13]	validation_0-rmse:1.71767
[14]	validation_0-rmse:1.71747
[15]	validation_0-rmse:1.71759
[16]	validation_0-rmse:1.71777
[17]	validation_0-rmse:1.71767
[18]	validation_0-rmse:1.71729
[19]	validation_0-rmse:1.71746
[20]	validation_0-rmse:1.71754
[21]	validation_0-rmse:1.71737
[22]	validation_0-rmse:1.71699
[23]	validation_0-rmse:1.71695
[24]	validation_0-rmse:1.71656
[25]	validation_0-rmse:1.71776
[26]	validation_0-rmse:1.71964
[27]	validation_0-rmse:1.71917
[28]	validation_0-rmse:1.72029
[29]	validation_0-rmse:1.72062
[30]	validation_0-rmse:1.72064
[31]	validation_0-rmse:1.72089
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.24042592172159072
Mean Absolute Error: 1.3151072898881557
Mean Squared Error: 3.2973229894701706
Root Mean Squared Error: 1.8158532400692988


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Fritos Limon y Sal**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[6]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r8 = XGBRegressor(n_estimators=10, seed=123)
xgb_r7 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r7.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r7.predict(x_test)

[0]	validation_0-rmse:1.95380
[1]	validation_0-rmse:1.87954
[2]	validation_0-rmse:1.84686
[3]	validation_0-rmse:1.83500
[4]	validation_0-rmse:1.82512
[5]	validation_0-rmse:1.81856
[6]	validation_0-rmse:1.81435
[7]	validation_0-rmse:1.81261
[8]	validation_0-rmse:1.81277
[9]	validation_0-rmse:1.81104
[10]	validation_0-rmse:1.80985
[11]	validation_0-rmse:1.80858
[12]	validation_0-rmse:1.80794
[13]	validation_0-rmse:1.80979
[14]	validation_0-rmse:1.80933
[15]	validation_0-rmse:1.80889
[16]	validation_0-rmse:1.80999
[17]	validation_0-rmse:1.80841
[18]	validation_0-rmse:1.80874
[19]	validation_0-rmse:1.80938
[20]	validation_0-rmse:1.81002
[21]	validation_0-rmse:1.81158
[22]	validation_0-rmse:1.81201
[23]	validation_0-rmse:1.81304
[24]	validation_0-rmse:1.81249
[25]	validation_0-rmse:1.81204
[26]	validation_0-rmse:1.81126
[27]	validation_0-rmse:1.81152
[28]	validation_0-rmse:1.81242
[29]	validation_0-rmse:1.81273
[30]	validation_0-rmse:1.81313
[31]	validation_0-rmse:1.81427
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.17595326188981086
Mean Absolute Error: 1.3605246169979748
Mean Squared Error: 3.7288391223934902
Root Mean Squared Error: 1.9310202283750137


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Sabritas Flamin Hot**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[7]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r9 = XGBRegressor(n_estimators=10, seed=123)
xgb_r8 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r8.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r8.predict(x_test)

[0]	validation_0-rmse:1.97328
[1]	validation_0-rmse:1.90007
[2]	validation_0-rmse:1.85671
[3]	validation_0-rmse:1.82693
[4]	validation_0-rmse:1.81367
[5]	validation_0-rmse:1.79999
[6]	validation_0-rmse:1.79272
[7]	validation_0-rmse:1.78938
[8]	validation_0-rmse:1.78763
[9]	validation_0-rmse:1.78743
[10]	validation_0-rmse:1.78649
[11]	validation_0-rmse:1.78653
[12]	validation_0-rmse:1.78330
[13]	validation_0-rmse:1.78178
[14]	validation_0-rmse:1.78181
[15]	validation_0-rmse:1.78346
[16]	validation_0-rmse:1.78294
[17]	validation_0-rmse:1.78348
[18]	validation_0-rmse:1.78416
[19]	validation_0-rmse:1.78280
[20]	validation_0-rmse:1.78388
[21]	validation_0-rmse:1.78312
[22]	validation_0-rmse:1.78457
[23]	validation_0-rmse:1.78467
[24]	validation_0-rmse:1.78486
[25]	validation_0-rmse:1.78395
[26]	validation_0-rmse:1.78284
[27]	validation_0-rmse:1.78353
[28]	validation_0-rmse:1.78237
[29]	validation_0-rmse:1.78428
[30]	validation_0-rmse:1.78494
[31]	validation_0-rmse:1.78571
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.21887666898772495
Mean Absolute Error: 1.3324718879996902
Mean Squared Error: 3.628609556047475
Root Mean Squared Error: 1.9048909564716494


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Sprite**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[8]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r10 = XGBRegressor(n_estimators=10, seed=123)
xgb_r9 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r9.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r9.predict(x_test)

[0]	validation_0-rmse:1.61823
[1]	validation_0-rmse:1.55892
[2]	validation_0-rmse:1.53629
[3]	validation_0-rmse:1.52659
[4]	validation_0-rmse:1.51945
[5]	validation_0-rmse:1.51438
[6]	validation_0-rmse:1.51335
[7]	validation_0-rmse:1.51249
[8]	validation_0-rmse:1.51103
[9]	validation_0-rmse:1.51037
[10]	validation_0-rmse:1.50994
[11]	validation_0-rmse:1.51025
[12]	validation_0-rmse:1.50996
[13]	validation_0-rmse:1.51012
[14]	validation_0-rmse:1.50977
[15]	validation_0-rmse:1.50961
[16]	validation_0-rmse:1.50946
[17]	validation_0-rmse:1.50853
[18]	validation_0-rmse:1.50884
[19]	validation_0-rmse:1.50960
[20]	validation_0-rmse:1.51069
[21]	validation_0-rmse:1.51060
[22]	validation_0-rmse:1.51069
[23]	validation_0-rmse:1.51115
[24]	validation_0-rmse:1.51136
[25]	validation_0-rmse:1.51192
[26]	validation_0-rmse:1.51292
[27]	validation_0-rmse:1.51336
[28]	validation_0-rmse:1.51334
[29]	validation_0-rmse:1.51317
[30]	validation_0-rmse:1.51430
[31]	validation_0-rmse:1.51620
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.1458556586776849
Mean Absolute Error: 1.1675276020336685
Mean Squared Error: 2.6295457662892248
Root Mean Squared Error: 1.6215874217226849


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


## **Cheetos**

In [None]:
modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == top10[8]]
X = modelo2.drop(columns=["Quantity"])
y = modelo2["Quantity"]
x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0,
)

eval_set = [(x_test, y_test)]

train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

# xgb_r10 = XGBRegressor(n_estimators=10, seed=123)
xgb_r10 = XGBRegressor(learning_rate=0.5, n_estimators=250)
xgb_r10.fit(x_train, y_train, eval_set=eval_set, verbose=True)
y_pred = xgb_r10.predict(x_test)

[0]	validation_0-rmse:1.61823
[1]	validation_0-rmse:1.55892
[2]	validation_0-rmse:1.53629
[3]	validation_0-rmse:1.52659
[4]	validation_0-rmse:1.51945
[5]	validation_0-rmse:1.51438
[6]	validation_0-rmse:1.51335
[7]	validation_0-rmse:1.51249
[8]	validation_0-rmse:1.51103
[9]	validation_0-rmse:1.51037
[10]	validation_0-rmse:1.50994
[11]	validation_0-rmse:1.51025
[12]	validation_0-rmse:1.50996
[13]	validation_0-rmse:1.51012
[14]	validation_0-rmse:1.50977
[15]	validation_0-rmse:1.50961
[16]	validation_0-rmse:1.50946
[17]	validation_0-rmse:1.50853
[18]	validation_0-rmse:1.50884
[19]	validation_0-rmse:1.50960
[20]	validation_0-rmse:1.51069
[21]	validation_0-rmse:1.51060
[22]	validation_0-rmse:1.51069
[23]	validation_0-rmse:1.51115
[24]	validation_0-rmse:1.51136
[25]	validation_0-rmse:1.51192
[26]	validation_0-rmse:1.51292
[27]	validation_0-rmse:1.51336
[28]	validation_0-rmse:1.51334
[29]	validation_0-rmse:1.51317
[30]	validation_0-rmse:1.51430
[31]	validation_0-rmse:1.51620
[32]	validation_0-

In [None]:
# Evaluamos el modelo
print("R2 Score:", metrics.r2_score(y_test, y_pred))
print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

R2 Score: 0.1458556586776849
Mean Absolute Error: 1.1675276020336685
Mean Squared Error: 2.6295457662892248
Root Mean Squared Error: 1.6215874217226849


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [None]:
# import pickle

# # Supongamos que tienes una lista de nombres de modelos
# nombres_modelos = [
#     "ModeloVentas",
#     "ModeloCoca",
#     "ModeloAgua",
#     "ModeloSabritas",
#     "ModeloRuffles",
#     "ModeloDoritos",
#     "ModeloPanini",
#     "ModeloFritos",
#     "ModeloFlamin",
#     "ModeloSprite",
#     "ModeloCheetos",
# ]

# # Supongamos también que tienes una lista de modelos ya entrenados
# modelos_entrenados = [
#     xgb_r,
#     xgb_r1,
#     xgb_r2,
#     xgb_r3,
#     xgb_r4,
#     xgb_r5,
#     xgb_r6,
#     xgb_r7,
#     xgb_r8,
#     xgb_r9,
#     xgb_r10,
# ]

# # Guardar cada modelo en un archivo pickle con su nombre correspondiente
# for nombre, modelo in zip(nombres_modelos, modelos_entrenados):
#     with open(f"{nombre}.pkl", "wb") as archivo:
#         pickle.dump(modelo, archivo)

In [None]:
# import pickle

# for i in range(93, 98):
#     modelo2 = vuelos_ventas[vuelos_ventas["ProductName"] == i]
#     X = modelo2.drop(columns=["Quantity"])
#     y = modelo2["Quantity"]
#     x_train, x_test, y_train, y_test = train_test_split(
#         X,
#         y,
#         test_size=0.3,
#         random_state=0,
#     )

#     eval_set = [(x_test, y_test)]

#     train_dmatrix = xg.DMatrix(data=x_train, label=y_train)
#     test_dmatrix = xg.DMatrix(data=x_test, label=y_test)

#     xgb_r = XGBRegressor(learning_rate=0.5, n_estimators=250)
#     xgb_r.fit(x_train, y_train, eval_set=eval_set, verbose=True)
#     y_pred = xgb_r.predict(x_test)

#     # Evaluamos el modelo
#     print("R2 Score:", metrics.r2_score(y_test, y_pred))
#     print("Mean Absolute Error:", metrics.mean_absolute_error(y_test, y_pred))
#     print("Mean Squared Error:", metrics.mean_squared_error(y_test, y_pred))
#     print(
#         "Root Mean Squared Error:", np.sqrt(metrics.mean_squared_error(y_test, y_pred))
#     )

#     with open(f"Modelo{i}.pkl", "wb") as archivo:
#         pickle.dump(xgb_r, archivo)

In [None]:
# Faltan 88 y 92

Unnamed: 0,ProductName,Quantity,Capacity,Passengers,Bookings,Semana,Mes,Duracion,Hora,Dia,...,arrival_BJ,arrival_BK,arrival_BL,arrival_BM,arrival_BN,arrival_BO,arrival_BP,arrival_BQ,arrival_BS,arrival_BT
