In [2]:
import pandas as pd

# Cargar un chunk procesado (por ejemplo del parquet original)
df = pd.read_parquet("../data_subsets/final_dataset_chunks/chunk_0.parquet")

# Quitar las columnas que no usas como entrada
X = df.drop(columns=["totalFare", "legId"])

# Mostrar columnas de entrada
print("🧠 Columnas de entrada (features):")
for i, col in enumerate(X.columns):
    print(f"{i+1:2d}. {col}")


🧠 Columnas de entrada (features):
 1. segment_num
 2. airline_code
 3. aircraft_type
 4. duration_seconds
 5. distance_miles
 6. departure_hour
 7. departure_weekday
 8. cabin
 9. arr_airport
10. dep_airport
11. startingAirport
12. destinationAirport
13. fareBasisCode
14. isBasicEconomy
15. isRefundable
16. isNonStop
17. baseFare
18. seatsRemaining
19. elapsedDays
20. days_in_advance
21. totalTravelDistance


In [26]:
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.models import load_model

# -------------------------------
# 1️⃣ CARGA DEL MODELO Y SCALER
# -------------------------------
model_path = "../models/flight_price_predictor_global_eval.h5"
scaler_path = "../models/standard_scaler.pkl"

model = load_model(model_path)
scaler = joblib.load(scaler_path)

print("✅ Modelo y scaler cargados correctamente.")

# -------------------------------
# 2️⃣ ENTRADA DE DATOS (21 FEATURES)
# -------------------------------

# Valores de ejemplo, en el orden correcto
values = [[
    1,      # segment_num
    3,      # airline_code
    12,     # aircraft_type
    7200,   # duration_seconds
    1350,    # distance_miles
    14,     # departure_hour
    1,      # departure_weekday
    0,      # cabin
    24,     # arr_airport
    10,     # dep_airport
    20,     # startingAirport
    24,     # destinationAirport
    5,      # fareBasisCode
    0,      # isBasicEconomy
    1,      # isRefundable
    1,      # isNonStop
    185.0,  # baseFare
    3,      # seatsRemaining
    1,      # elapsedDays
    21,     # days_in_advance
    580.0   # totalTravelDistance
]]

# Nombres de columnas
columns = [
    "segment_num", "airline_code", "aircraft_type", "duration_seconds",
    "distance_miles", "departure_hour", "departure_weekday", "cabin",
    "arr_airport", "dep_airport", "startingAirport", "destinationAirport",
    "fareBasisCode", "isBasicEconomy", "isRefundable", "isNonStop",
    "baseFare", "seatsRemaining", "elapsedDays", "days_in_advance",
    "totalTravelDistance"
]

# Crear DataFrame de entrada
input_df = pd.DataFrame(values, columns=columns)
print("🧠 Entrada preparada:")
display(input_df)

# -------------------------------
# 3️⃣ ESCALAR Y PREDECIR
# -------------------------------
input_scaled = scaler.transform(input_df)
predicted_scaled = model.predict(input_scaled)[0][0]
predicted_price = predicted_scaled * 1000.0  # Desescalar

# -------------------------------
# 4️⃣ RESULTADO
# -------------------------------
print(f"💸 Precio estimado: {predicted_price:.2f} €")




✅ Modelo y scaler cargados correctamente.
🧠 Entrada preparada:


Unnamed: 0,segment_num,airline_code,aircraft_type,duration_seconds,distance_miles,departure_hour,departure_weekday,cabin,arr_airport,dep_airport,...,destinationAirport,fareBasisCode,isBasicEconomy,isRefundable,isNonStop,baseFare,seatsRemaining,elapsedDays,days_in_advance,totalTravelDistance
0,1,3,12,7200,1350,14,1,0,24,10,...,24,5,0,1,1,185.0,3,1,21,580.0




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step
💸 Precio estimado: 365.13 €


In [47]:
# -------------------------------------
# 1. IMPORTAR LIBRERÍAS Y CARGAR MODELO
# -------------------------------------
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.models import load_model

# Rutas a los modelos
model_path = "../models/flight_price_predictor_v4_clean_final.h5"
scaler_path = "../models/standard_scaler.pkl"

# Cargar modelo y scaler
model = load_model(model_path)
scaler = joblib.load(scaler_path)

print("✅ Modelo y scaler cargados correctamente.")

# -------------------------------------
# 2. DEFINIR COLUMNAS Y VALORES DE ENTRADA
# -------------------------------------
columns = [
    "segment_num", "airline_code", "aircraft_type", "duration_seconds", "distance_miles",
    "departure_hour", "departure_weekday", "cabin", "arr_airport", "dep_airport",
    "startingAirport", "destinationAirport", "fareBasisCode", "isBasicEconomy",
    "isRefundable", "isNonStop", "baseFare", "seatsRemaining", "elapsedDays",
    "days_in_advance", "totalTravelDistance"
]

# ✅ Cambia aquí los valores de entrada para predecir otro caso
input_data = np.array([[0, 10, 17, 3000, 350, 14, 4, 12, 24, 10, 24, 24, 7, 0, 1, 1, 350, 4, 5, 30, 350]])

# Convertir a DataFrame para mantener el orden correcto de columnas
df_input = pd.DataFrame(input_data, columns=columns)
print("📥 Entrada preparada:")
display(df_input)

# -------------------------------------
# 3. ESCALAR Y PREDECIR
# -------------------------------------
input_scaled = scaler.transform(df_input)
predicted_scaled = model.predict(input_scaled)[0][0]
predicted_price = predicted_scaled * 1000  # Reescalar

print(f"📈 Precio estimado: {predicted_price:.2f} €")




✅ Modelo y scaler cargados correctamente.
📥 Entrada preparada:


Unnamed: 0,segment_num,airline_code,aircraft_type,duration_seconds,distance_miles,departure_hour,departure_weekday,cabin,arr_airport,dep_airport,...,destinationAirport,fareBasisCode,isBasicEconomy,isRefundable,isNonStop,baseFare,seatsRemaining,elapsedDays,days_in_advance,totalTravelDistance
0,0,10,17,3000,350,14,4,12,24,10,...,24,7,0,1,1,350,4,5,30,350


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
📈 Precio estimado: 201.87 €


