# **PREDECIR NÚMERO DE PASAJEROS**

In [54]:
import pandas as pd
df = pd.read_csv('red_data.csv')
df.drop(['Unnamed: 0'], axis=1)
df = df.dropna()
df.head()

Unnamed: 0.1,Unnamed: 0,Flight_ID,Agua Natural 600 Ml,Amstel Ultra,Arandano,Arandano Mango Mix,Arcoiris,Baileys,Baileys.1,Cafe 19 Cafe Clasico,...,Xx Ultra,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
0,0,00004a718edba9d9ef878d08f02ae057,4,0,0,0,0,0,0,0,...,0,174.0,Ciudad Principal,MX Amigos y Familia,180.0,106.0,AL-AT,135.0,16.0,12.0
1,1,0000cd79c0c3a9c309df6064dcacaeea,2,0,0,0,0,0,0,0,...,1,186.0,MX Amigos y Familia,Playa,186.0,93.0,AK-AD,95.0,19.0,10.0
2,2,000163f0df9cbfc35c4c06645ec512f6,0,0,0,0,0,0,0,0,...,0,173.0,MX Amigos y Familia,Ciudad Principal,220.0,138.0,AW-BF,85.0,6.0,10.0
3,3,00017be73003a570dd426b155762769c,8,0,0,1,0,0,0,0,...,0,156.0,Playa,Ciudad Fronteriza,240.0,95.0,BM-BD,225.0,9.0,10.0
4,4,0001a43836c338f8d8650aefb11672c9,3,0,0,2,0,2,0,0,...,0,157.0,Ciudad Fronteriza,Playa,240.0,78.0,BD-BM,55.0,16.0,12.0


In [56]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Preparar los datos
X = df[['Destination_Type', 'Origin_Type', 'Capacity', 'Route', 'Hora_Salida_Redondeado', 'Month']]
y = df['Passengers']

# Definir las columnas categóricas
categorical_features = ['Destination_Type', 'Origin_Type', 'Route', 'Month']

# Crear el transformador de columnas con OneHotEncoder para las variables categóricas
column_transformer = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

# Crear el pipeline con el transformador y el modelo de Random Forest
pipeline = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar el modelo
pipeline.fit(X_train, y_train)

# Predecir los valores para el conjunto de prueba
y_pred = pipeline.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse}, R^2: {r2}")

def predict_passengers(input_df):
    """
    Asume que input_df ya tiene las columnas necesarias para la predicción.
    """
    # Asegurarse de que el DataFrame tenga las columnas necesarias
    required_columns = ['Destination_Type', 'Origin_Type', 'Capacity', 'Route', 'Month', 'Hora_Salida_Redondeado']
    if not all(column in input_df.columns for column in required_columns):
        raise ValueError(f"El DataFrame debe contener las siguientes columnas: {required_columns}")

    # Hacer la predicción usando el pipeline
    predicted_passengers = pipeline.predict(input_df[required_columns])

    # Añadir los resultados al DataFrame original
    input_df['Passengers'] = predicted_passengers.round().astype(int)

    return input_df


MSE: 519.4682958012703, R^2: 0.5827999473059782


In [59]:
datos24 = pd.read_csv('sales_24.csv')
datos24.head()


Unnamed: 0,Flight_ID,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
0,0000c5ba279c7225e9f6bac8490678e1,,Playa,Ciudad Principal,220,,AT-BD,100.0,15,3
1,000158c9b43de5c11cb46d96b0501f2e,,Ciudad Principal,MX Amigos y Familia,178,,BN-AW,100.0,18,4
2,0005bc7d23da6c5f32537dba8779ad1b,,Ciudad Principal,MX Amigos y Familia,186,,BQ-AW,125.0,14,4
3,0005ccfc27398499603bf18827ddb892,,Ciudad Principal,Ciudad Principal,220,,AT-AW,110.0,18,3
4,0005d2ee26b96255f2a042e01309b9e8,,Playa,Ciudad Principal,230,,AT-BE,75.0,11,4


Predecir para todos los datos de 2024 para ya tenerlo en la base de datos

In [60]:
# Predecir matriz
df_predicted = predict_passengers(datos24)

# Mostrar el DataFrame con las predicciones añadidas
df_predicted

Unnamed: 0,Flight_ID,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
0,0000c5ba279c7225e9f6bac8490678e1,187,Playa,Ciudad Principal,220,,AT-BD,100.0,15,3
1,000158c9b43de5c11cb46d96b0501f2e,153,Ciudad Principal,MX Amigos y Familia,178,,BN-AW,100.0,18,4
2,0005bc7d23da6c5f32537dba8779ad1b,181,Ciudad Principal,MX Amigos y Familia,186,,BQ-AW,125.0,14,4
3,0005ccfc27398499603bf18827ddb892,210,Ciudad Principal,Ciudad Principal,220,,AT-AW,110.0,18,3
4,0005d2ee26b96255f2a042e01309b9e8,213,Playa,Ciudad Principal,230,,AT-BE,75.0,11,4
...,...,...,...,...,...,...,...,...,...,...
41649,fffcab2b9f2830d152d340dd23d4050e,187,Ciudad Principal,Ciudad Fronteriza,240,,BM-AO,290.0,1,2
41650,fffd2e563a1bad35b6fbee1cac5fa680,164,MX Amigos y Familia,Ciudad Principal,180,,AW-BF,80.0,16,1
41651,fffed485a13460817032edbb8eca9295,221,Ciudad Principal,Playa,230,,AY-BA,160.0,14,4
41652,ffff1e8f5485017f4c5a6a1919369156,181,Ciudad Principal,Ciudad Principal,186,,AT-AW,105.0,6,4


# **PREDECIR CANTIDAD DE PRODUCTOS COMPRADOS**

In [141]:
import pandas as pd
df_red = pd.read_csv('red_data.csv')
df_red = df_red.drop(['Unnamed: 0'], axis=1)
df_red = df_red.dropna()
df_red

Unnamed: 0,Flight_ID,Agua Natural 600 Ml,Amstel Ultra,Arandano,Arandano Mango Mix,Arcoiris,Baileys,Baileys.1,Cafe 19 Cafe Clasico,Cafe 19 Capuchino,...,Xx Ultra,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
0,00004a718edba9d9ef878d08f02ae057,4,0,0,0,0,0,0,0,0,...,0,174.0,Ciudad Principal,MX Amigos y Familia,180.0,106.0,AL-AT,135.0,16.0,12.0
1,0000cd79c0c3a9c309df6064dcacaeea,2,0,0,0,0,0,0,0,1,...,1,186.0,MX Amigos y Familia,Playa,186.0,93.0,AK-AD,95.0,19.0,10.0
2,000163f0df9cbfc35c4c06645ec512f6,0,0,0,0,0,0,0,0,1,...,0,173.0,MX Amigos y Familia,Ciudad Principal,220.0,138.0,AW-BF,85.0,6.0,10.0
3,00017be73003a570dd426b155762769c,8,0,0,1,0,0,0,0,2,...,0,156.0,Playa,Ciudad Fronteriza,240.0,95.0,BM-BD,225.0,9.0,10.0
4,0001a43836c338f8d8650aefb11672c9,3,0,0,2,0,2,0,0,0,...,0,157.0,Ciudad Fronteriza,Playa,240.0,78.0,BD-BM,55.0,16.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108759,fffe47f84ecc55da94b2907a7317dd12,15,0,0,0,1,0,0,0,2,...,0,203.0,Ciudad Fronteriza,Playa,240.0,107.0,BH-BM,135.0,15.0,6.0
108760,fffe4a9c19ae2320e76f02939fdff957,4,4,0,0,0,0,1,0,0,...,0,220.0,Ciudad Principal,Ciudad Principal,220.0,160.0,AT-AW,105.0,16.0,7.0
108761,ffff138536f249f69340e0a8336f94a4,5,0,0,3,0,0,0,0,0,...,2,240.0,Playa,Ciudad Principal,240.0,101.0,AW-AK,200.0,14.0,6.0
108762,ffff782f2ae79e385a016c00fbd994b8,3,0,0,0,0,0,0,0,0,...,0,181.0,MX Amigos y Familia,Ciudad Principal,186.0,132.0,AT-AL,135.0,14.0,11.0


In [135]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

class FlightDataset(Dataset):
    def __init__(self, features, labels):
        self.features = np.array(features, dtype=np.float32)
        self.labels = np.array(labels, dtype=np.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx]), torch.tensor(self.labels[idx])

def preprocess_data(df):
    features = df[['Passengers', 'Destination_Type', 'Origin_Type', 'Capacity', 'Bookings', 'Route', 'Flight_Duration', 'Hora_Salida_Redondeado', 'Month']].copy()
    labels = df.iloc[:, 1:98].copy()  # Excluyendo Flight_ID y asumiendo que estos son los productos

    # Codificación y escala
    le = LabelEncoder()
    for col in ['Destination_Type', 'Origin_Type', 'Route', 'Month']:
        features[col] = le.fit_transform(features[col])
    scaler = StandardScaler()
    features = scaler.fit_transform(features)

    # Verificar NaNs o infs
    features[np.isnan(features)] = 0
    features[np.isinf(features)] = 0
    labels[np.isnan(labels)] = 0
    labels[np.isinf(labels)] = 0

    return features, labels

class ProductPredictionNN(nn.Module):
    def __init__(self):
        super(ProductPredictionNN, self).__init__()
        self.fc1 = nn.Linear(9, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 256)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, 128)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(128, 97)
        self.softplus = nn.Softplus()

        # Inicialización He
        nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc3.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc4.weight)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.relu3(self.fc3(x))
        x = self.fc4(x)
        x = self.softplus(x)
        return torch.round(x)

# Procesamiento de datos, inicialización del modelo, y configuración del entrenamiento
features, labels = preprocess_data(df_red)  # Asume df_red ya está cargado
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
train_dataset = FlightDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
model = ProductPredictionNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Tasa de aprendizaje ajustable

def train_model(num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(torch.float32), targets.to(torch.float32)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            if torch.isnan(loss):
                print("NaN loss detected")
                continue
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

train_model(10)


Epoch 1, Loss: 3.1119801998138428
Epoch 2, Loss: 3.4017064571380615
Epoch 3, Loss: 3.383220672607422
Epoch 4, Loss: 3.0814077854156494
Epoch 5, Loss: 1.9985780715942383
Epoch 6, Loss: 3.2132954597473145
Epoch 7, Loss: 3.947387218475342
Epoch 8, Loss: 4.367934703826904
Epoch 9, Loss: 4.124422550201416
Epoch 10, Loss: 2.762531042098999


In [137]:
def evaluate_model(model, test_loader):
    model.eval()  # Poner el modelo en modo de evaluación
    predictions = []
    with torch.no_grad():  # No necesitamos calcular gradientes en la evaluación
        for inputs, _ in test_loader:
            inputs = inputs.to(torch.float32)
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
    return np.array(predictions)

# Cargar el dataset de prueba
test_dataset = FlightDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluar el modelo
model_predictions = evaluate_model(model, test_loader)

model_predictions

# Si deseas comparar con los valores reales:
actual_values = y_test[:len(model_predictions)]  # Asegúrate de que las dimensiones coincidan
print("Valores predecidos:")
actual_values.head()

Valores predecidos:


Unnamed: 0,Agua Natural 600 Ml,Amstel Ultra,Arandano,Arandano Mango Mix,Arcoiris,Baileys,Baileys.1,Cafe 19 Cafe Clasico,Cafe 19 Capuchino,Cafe 19 Chiapas,...,Topochico Seltzer Fresa-Guayaba,Topochico Seltzer Mango,Tostitos,Tostitos Nachos Con Dip,Ultra Seltzer Frambuesa,Vino Blanco Cria Cuervos,Vino Tinto Cria Cuervos,Vino Tinto Sangre De Toro,Xx Lager,Xx Ultra
106574,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
64113,1,2,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
81933,3,0,0,1,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
22361,6,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
35587,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [138]:
def predict_new_data(model, new_data):
    # Convertir los datos a un tensor de PyTorch
    new_data_tensor = torch.tensor(new_data, dtype=torch.float32)

    # Poner el modelo en modo de evaluación
    model.eval()

    # Asegurarse de que no se calculen gradientes
    with torch.no_grad():
        # Hacer la predicción
        predictions = model(new_data_tensor)

    # Convertir las predicciones a numpy array si es necesario
    predictions = predictions.numpy()

    # Redondear las predicciones si el modelo original lo hace
    rounded_predictions = np.round(predictions)

    return rounded_predictions

# Ejemplo de cómo usar la función predict_new_data
# Suponiendo que new_data_processed es tu array de datos nuevos que ya está preprocesado
# new_data_processed debe ser un array 2D de NumPy (o una lista de listas) donde cada sublista representa una observación
new_data_processed = np.array([[1.58865092, -0.66975557, -0.6450756 ,  1.23765838,  2.5288787 ,0.24067213, -0.41327962,  1.17004819, -0.19944974]])  # Ejemplo con datos ficticios

# Hacer la predicción
predictions = predict_new_data(model, new_data_processed)
print(predictions)


[[0. 0. 2. 1. 1. 1. 0. 1. 2. 1. 2. 1. 1. 1. 1. 0. 1. 2. 1. 0. 2. 1. 2. 1.
  1. 1. 0. 0. 2. 3. 0. 0. 2. 2. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 2.
  3. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 3. 2. 0. 0. 0. 1. 1. 2. 1. 1. 3. 0. 0.
  0. 0. 0. 2. 0. 1. 2. 0. 0. 1. 2. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 3. 1. 1.
  0.]]


Preparar datos para las predicciones de 2024

In [142]:
new_columns = actual_values.columns

new_data = pd.DataFrame(columns=new_columns)

# Separar la primera columna de df
first_column = datos24.iloc[:, :1]

# Resto de columnas después de la primera
remaining_columns = datos24.iloc[:, 1:]

# Concatenar la primera columna, las nuevas columnas, y el resto de las columnas originales
merge = pd.concat([first_column, new_data, remaining_columns], axis=1)
merge.head()

Unnamed: 0,Flight_ID,Agua Natural 600 Ml,Amstel Ultra,Arandano,Arandano Mango Mix,Arcoiris,Baileys,Baileys.1,Cafe 19 Cafe Clasico,Cafe 19 Capuchino,...,Xx Ultra,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
0,0000c5ba279c7225e9f6bac8490678e1,,,,,,,,,,...,,187,Playa,Ciudad Principal,220,,AT-BD,100.0,15,3
1,000158c9b43de5c11cb46d96b0501f2e,,,,,,,,,,...,,153,Ciudad Principal,MX Amigos y Familia,178,,BN-AW,100.0,18,4
2,0005bc7d23da6c5f32537dba8779ad1b,,,,,,,,,,...,,181,Ciudad Principal,MX Amigos y Familia,186,,BQ-AW,125.0,14,4
3,0005ccfc27398499603bf18827ddb892,,,,,,,,,,...,,210,Ciudad Principal,Ciudad Principal,220,,AT-AW,110.0,18,3
4,0005d2ee26b96255f2a042e01309b9e8,,,,,,,,,,...,,213,Playa,Ciudad Principal,230,,AT-BE,75.0,11,4


In [144]:
df_filled = merge.fillna(0)
df_filled

Unnamed: 0,Flight_ID,Agua Natural 600 Ml,Amstel Ultra,Arandano,Arandano Mango Mix,Arcoiris,Baileys,Baileys.1,Cafe 19 Cafe Clasico,Cafe 19 Capuchino,...,Xx Ultra,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
0,0000c5ba279c7225e9f6bac8490678e1,0,0,0,0,0,0,0,0,0,...,0,187,Playa,Ciudad Principal,220,0.0,AT-BD,100.0,15,3
1,000158c9b43de5c11cb46d96b0501f2e,0,0,0,0,0,0,0,0,0,...,0,153,Ciudad Principal,MX Amigos y Familia,178,0.0,BN-AW,100.0,18,4
2,0005bc7d23da6c5f32537dba8779ad1b,0,0,0,0,0,0,0,0,0,...,0,181,Ciudad Principal,MX Amigos y Familia,186,0.0,BQ-AW,125.0,14,4
3,0005ccfc27398499603bf18827ddb892,0,0,0,0,0,0,0,0,0,...,0,210,Ciudad Principal,Ciudad Principal,220,0.0,AT-AW,110.0,18,3
4,0005d2ee26b96255f2a042e01309b9e8,0,0,0,0,0,0,0,0,0,...,0,213,Playa,Ciudad Principal,230,0.0,AT-BE,75.0,11,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41649,fffcab2b9f2830d152d340dd23d4050e,0,0,0,0,0,0,0,0,0,...,0,187,Ciudad Principal,Ciudad Fronteriza,240,0.0,BM-AO,290.0,1,2
41650,fffd2e563a1bad35b6fbee1cac5fa680,0,0,0,0,0,0,0,0,0,...,0,164,MX Amigos y Familia,Ciudad Principal,180,0.0,AW-BF,80.0,16,1
41651,fffed485a13460817032edbb8eca9295,0,0,0,0,0,0,0,0,0,...,0,221,Ciudad Principal,Playa,230,0.0,AY-BA,160.0,14,4
41652,ffff1e8f5485017f4c5a6a1919369156,0,0,0,0,0,0,0,0,0,...,0,181,Ciudad Principal,Ciudad Principal,186,0.0,AT-AW,105.0,6,4


In [170]:
# Procesamiento de datos, inicialización del modelo, y configuración del entrenamiento
features, labels = preprocess_data(df_filled)  # Asume df_red ya está cargado
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
train_dataset = FlightDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
model = ProductPredictionNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Tasa de aprendizaje ajustable

def train_model(num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(torch.float32), targets.to(torch.float32)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            if torch.isnan(loss):
                print("NaN loss detected")
                continue
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

train_model(1)

Epoch 1, Loss: 1.422680377960205


In [173]:
x_todo = np.concatenate([X_train, X_test])

In [176]:
x = df_filled[['Flight_ID']]
x

Unnamed: 0,Flight_ID
0,0000c5ba279c7225e9f6bac8490678e1
1,000158c9b43de5c11cb46d96b0501f2e
2,0005bc7d23da6c5f32537dba8779ad1b
3,0005ccfc27398499603bf18827ddb892
4,0005d2ee26b96255f2a042e01309b9e8
...,...
41649,fffcab2b9f2830d152d340dd23d4050e
41650,fffd2e563a1bad35b6fbee1cac5fa680
41651,fffed485a13460817032edbb8eca9295
41652,ffff1e8f5485017f4c5a6a1919369156


In [178]:
list_of_arrays = list(x_todo)

# Crear un DataFrame a partir de la lista de arrays
x['Array'] = list_of_arrays
x

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['Array'] = list_of_arrays


Unnamed: 0,Flight_ID,Array
0,0000c5ba279c7225e9f6bac8490678e1,"[1.2008177500214985, -1.3974351404739251, 0.85..."
1,000158c9b43de5c11cb46d96b0501f2e,"[-0.22554177259618122, -0.6528966548917392, 1...."
2,0005bc7d23da6c5f32537dba8779ad1b,"[-0.9762573108160126, -0.6528966548917392, 1.6..."
3,0005ccfc27398499603bf18827ddb892,"[0.2999591041577008, 1.5807188018548186, 0.856..."
4,0005d2ee26b96255f2a042e01309b9e8,"[-0.5258279878841138, 1.5807188018548186, -0.6..."
...,...,...
41649,fffcab2b9f2830d152d340dd23d4050e,"[0.2999591041577008, 0.0916418306904467, 0.856..."
41650,fffd2e563a1bad35b6fbee1cac5fa680,"[1.6137112960424056, -0.6528966548917392, 0.10..."
41651,fffed485a13460817032edbb8eca9295,"[1.9139975113303382, -1.3974351404739251, -0.6..."
41652,ffff1e8f5485017f4c5a6a1919369156,"[0.03720866578075978, -1.3974351404739251, 0.8..."


In [227]:
# Ejemplo de cómo usar la función predict_new_data
# Suponiendo que new_data_processed es tu array de datos nuevos que ya está preprocesado
# new_data_processed debe ser un array 2D de NumPy (o una lista de listas) donde cada sublista representa una observación

ID = "35b62c4a09ba0a69e0c9b9a006d83f18" #  '4d95c80c854b637e6fdf951e85f1a05b', '2385ceefd5123f0cdf95994f9978d216', '35b62c4a09ba0a69e0c9b9a006d83f18','c073f59d3bea0a295cc8e466265c28c1', 'b5b6665fdc1048a63fd7e2f224a51b6c'

vuelo = x[x["Flight_ID"]==ID]

new_data_processed = (vuelo.iloc[:, 1:])  # Ejemplo con datos ficticios

new_data_processed

Unnamed: 0,Array
8812,"[-0.7135068724390716, 0.8361803162726327, 1.60..."


In [228]:
# Hacer la predicción
predictions = predict_new_data(model, new_data_processed.iloc[0, 0])
print(predictions)


[0. 1. 4. 3. 1. 1. 0. 1. 0. 1. 0. 1. 0. 2. 2. 0. 1. 2. 2. 1. 3. 2. 1. 0.
 0. 0. 0. 1. 0. 1. 2. 0. 1. 1. 3. 0. 0. 5. 3. 2. 1. 1. 0. 1. 1. 1. 1. 2.
 0. 1. 0. 1. 0. 0. 0. 2. 0. 0. 3. 1. 1. 2. 2. 2. 1. 0. 0. 3. 0. 5. 4. 0.
 0. 0. 1. 2. 0. 0. 2. 2. 1. 1. 1. 0. 1. 3. 0. 3. 0. 0. 4. 0. 0. 0. 2. 0.
 3.]


# MONTECARLO

In [231]:
fila_filtrada = df_filled[df_filled['Flight_ID'] == ID]
valor_passengers = fila_filtrada['Passengers'].iloc[0]
valor_capacity = fila_filtrada['Capacity'].iloc[0]

In [232]:
fila_filtrada

Unnamed: 0,Flight_ID,Agua Natural 600 Ml,Amstel Ultra,Arandano,Arandano Mango Mix,Arcoiris,Baileys,Baileys.1,Cafe 19 Cafe Clasico,Cafe 19 Capuchino,...,Xx Ultra,Passengers,Destination_Type,Origin_Type,Capacity,Bookings,Route,Flight_Duration,Hora_Salida_Redondeado,Month
8812,35b62c4a09ba0a69e0c9b9a006d83f18,0,0,0,0,0,0,0,0,0,...,0,226,Ciudad Fronteriza,Ecoturismo,240,0.0,BB-BM,135.0,7,1


In [233]:
df = pd.DataFrame({
    "Flight_ID": ID,
    "ProductName": actual_values.columns,
    "Quantity": predictions[:len(actual_values.columns)],
    "Capacity": valor_capacity,
    "Passengers": valor_passengers
})

# Ordena el dataset por ProductName
df.sort_values(by='ProductName', inplace=True)

In [222]:
df['SalesProbPerProd'] = df['Quantity'] / df['Passengers'].replace(0, pd.NA)

# Mostrar el DataFrame actualizado
df

Unnamed: 0,Flight_ID,ProductName,Quantity,Capacity,Passengers,SalesProbPerProd,CumulativeProbability
0,4d95c80c854b637e6fdf951e85f1a05b,Agua Natural 600 Ml,0.0,240,226,0.000000,0.000000
1,4d95c80c854b637e6fdf951e85f1a05b,Amstel Ultra,1.0,240,226,0.004425,0.004425
2,4d95c80c854b637e6fdf951e85f1a05b,Arandano,2.0,240,226,0.008850,0.013274
3,4d95c80c854b637e6fdf951e85f1a05b,Arandano Mango Mix,1.0,240,226,0.004425,0.017699
4,4d95c80c854b637e6fdf951e85f1a05b,Arcoiris,1.0,240,226,0.004425,0.022124
...,...,...,...,...,...,...,...
92,4d95c80c854b637e6fdf951e85f1a05b,Vino Blanco Cria Cuervos,1.0,240,226,0.004425,0.309735
93,4d95c80c854b637e6fdf951e85f1a05b,Vino Tinto Cria Cuervos,0.0,240,226,0.000000,0.309735
94,4d95c80c854b637e6fdf951e85f1a05b,Vino Tinto Sangre De Toro,1.0,240,226,0.004425,0.314159
95,4d95c80c854b637e6fdf951e85f1a05b,Xx Lager,1.0,240,226,0.004425,0.318584


In [223]:
df['CumulativeProbability'] = df['SalesProbPerProd'].cumsum()

In [224]:
df

Unnamed: 0,Flight_ID,ProductName,Quantity,Capacity,Passengers,SalesProbPerProd,CumulativeProbability
0,4d95c80c854b637e6fdf951e85f1a05b,Agua Natural 600 Ml,0.0,240,226,0.000000,0.000000
1,4d95c80c854b637e6fdf951e85f1a05b,Amstel Ultra,1.0,240,226,0.004425,0.004425
2,4d95c80c854b637e6fdf951e85f1a05b,Arandano,2.0,240,226,0.008850,0.013274
3,4d95c80c854b637e6fdf951e85f1a05b,Arandano Mango Mix,1.0,240,226,0.004425,0.017699
4,4d95c80c854b637e6fdf951e85f1a05b,Arcoiris,1.0,240,226,0.004425,0.022124
...,...,...,...,...,...,...,...
92,4d95c80c854b637e6fdf951e85f1a05b,Vino Blanco Cria Cuervos,1.0,240,226,0.004425,0.309735
93,4d95c80c854b637e6fdf951e85f1a05b,Vino Tinto Cria Cuervos,0.0,240,226,0.000000,0.309735
94,4d95c80c854b637e6fdf951e85f1a05b,Vino Tinto Sangre De Toro,1.0,240,226,0.004425,0.314159
95,4d95c80c854b637e6fdf951e85f1a05b,Xx Lager,1.0,240,226,0.004425,0.318584


In [226]:
# SIMULACION MONTECARLO

import numpy as np
import pandas as pd

num_simulations = 1000
products = df['ProductName'].unique()
sales_results = {product: [] for product in products}

for _ in range(num_simulations):
    numero_aleatorio = np.random.uniform(0, df['CumulativeProbability'].max())

    for index, row in df.iterrows():
        if numero_aleatorio <= row['CumulativeProbability']:
            sales_results[row['ProductName']].append(row['Quantity'])
            break

# Determinar el valor de compra más frecuentemente registrado para cada producto
most_frequent_sales = {}

for product, quantities in sales_results.items():
    if quantities:  # Verificar que haya ventas registradas
        most_frequent = pd.Series(quantities).value_counts().idxmax()
        most_frequent_sales[product] = most_frequent
    else:
        most_frequent_sales[product] = 0  # No hubo ventas en las simulaciones para este producto

# Mostrar los resultados con formato alineado
max_length = max(len(product) for product in most_frequent_sales.keys())
print(f"{'Producto'.ljust(max_length)} | Número más frecuente de ventas en {num_simulations} simulaciones")
print('-' * (max_length + 54))

for product, frequent_sales in most_frequent_sales.items():
    print(f"{product.ljust(max_length)} | {frequent_sales}")

Producto                               | Número más frecuente de ventas en 1000 simulaciones
--------------------------------------------------------------------------------------------
Agua Natural 600 Ml                    | 0
Amstel Ultra                           | 1.0
Arandano                               | 2.0
Arandano Mango Mix                     | 1.0
Arcoiris                               | 1.0
Baileys                                | 1.0
Baileys                                | 0
Cafe 19 Cafe Clasico                   | 1.0
Cafe 19 Capuchino                      | 0
Cafe 19 Chiapas                        | 2.0
Cafe Costa                             | 0
Cafe De Olla                           | 2.0
Carne Seca Habanero                    | 0
Carne Seca Original                    | 1.0
Cerveza Charter                        | 1.0
Cheetos                                | 0
Cheetos Flamin Hot                     | 1.0
Chokis                                 | 0
Ciel Mineralizada 