In [98]:
import pandas as pd
df_posiciones = pd.read_csv("posiciones.csv", index_col=0)
df_vuelos = pd.read_csv("vuelos.csv", index_col=0)

# Ordenar los dataframes por Timestamp (kafka) y luego por ICAO
df_posiciones = df_posiciones.sort_values(["Timestamp (kafka)", "ICAO"])
df_vuelos = df_vuelos.sort_values(["Timestamp (kafka)", "ICAO"])

# Hacer el merge asof para asignar el Callsign a cada posición
df = pd.merge_asof(df_posiciones, df_vuelos, on="Timestamp (kafka)", by="ICAO", direction="nearest")

df


Unnamed: 0,Timestamp (kafka),ICAO,Flight status,lat,lon,Callsign
0,1733011200114,34108F,on-ground,40.501983,-3.567529,HIELO26_
1,1733011200114,49D2EE,airborne,41.854977,-4.535118,TVP7306_
2,1733011200114,502D5E,airborne,41.102088,-5.942293,BTI2YM__
3,1733011200121,34510A,airborne,39.280380,-7.076151,IBE0169_
4,1733011200122,347519,airborne,40.519821,-6.373169,IBE0123_
...,...,...,...,...,...,...
92516,1733016190658,342348,on-ground,40.537025,-3.558977,TXLU04__
92517,1733016190658,342349,on-ground,40.497793,-3.557854,TXLU05__
92518,1733016190658,34234B,on-ground,40.469967,-3.530154,TXLU07__
92519,1733016190658,34234E,on-ground,40.453010,-3.572180,TXLU10__


In [99]:
from preprocess.decoder import Decoder

df["Timestamp (date)"] = df['Timestamp (kafka)'].apply(Decoder.kafkaToDate)
df["Timestamp (date)"]

0       2024-12-01 01:00:00.114
1       2024-12-01 01:00:00.114
2       2024-12-01 01:00:00.114
3       2024-12-01 01:00:00.121
4       2024-12-01 01:00:00.122
                  ...          
92516   2024-12-01 02:23:10.658
92517   2024-12-01 02:23:10.658
92518   2024-12-01 02:23:10.658
92519   2024-12-01 02:23:10.658
92520   2024-12-01 02:23:10.658
Name: Timestamp (date), Length: 92521, dtype: datetime64[ns]

In [101]:
df_chunk = df.sort_values(by=['Callsign', 'Timestamp (date)'])

In [102]:
df_chunk

Unnamed: 0,Timestamp (kafka),ICAO,Flight status,lat,lon,Callsign,Timestamp (date)
11,1733011200320,344115,on-ground,41.151666,-3.604436,7777XPA1,2024-12-01 01:00:00.320
23,1733011200930,344115,on-ground,41.151672,-3.604431,7777XPA1,2024-12-01 01:00:00.930
34,1733011201338,344115,on-ground,41.151672,-3.604431,7777XPA1,2024-12-01 01:00:01.338
65,1733011202449,344115,on-ground,41.151666,-3.604436,7777XPA1,2024-12-01 01:00:02.449
75,1733011203060,344115,on-ground,41.151666,-3.604436,7777XPA1,2024-12-01 01:00:03.060
...,...,...,...,...,...,...,...
49884,1733013692321,4CA264,on-ground,40.460758,-3.569016,,2024-12-01 01:41:32.321
49971,1733013697380,4CA264,on-ground,40.460761,-3.569027,,2024-12-01 01:41:37.380
50073,1733013702342,4CA264,on-ground,40.460761,-3.569027,,2024-12-01 01:41:42.342
50280,1733013712269,4CA264,on-ground,40.460761,-3.569027,,2024-12-01 01:41:52.269


In [103]:
# Agrupar por Callsign
grouped = df_chunk.groupby('Callsign')['Flight status']

# Mostrar valores únicos de 'Flight status' para cada Callsign
for callsign, group in grouped:
    print(f"Callsign: {callsign}")
    print("Valores únicos en 'Flight status':", group.unique())
    print("-" * 50)


Callsign: 7777XPA1
Valores únicos en 'Flight status': ['on-ground']
--------------------------------------------------
Callsign: AEA023__
Valores únicos en 'Flight status': ['on-ground' 'airborne']
--------------------------------------------------
Callsign: ANE3016_
Valores únicos en 'Flight status': ['airborne']
--------------------------------------------------
Callsign: ANE82FC_
Valores únicos en 'Flight status': ['on-ground' 'airborne']
--------------------------------------------------
Callsign: AVA047__
Valores únicos en 'Flight status': ['on-ground' 'airborne']
--------------------------------------------------
Callsign: AZG997__
Valores únicos en 'Flight status': ['airborne' 'on-ground']
--------------------------------------------------
Callsign: BIX231H_
Valores únicos en 'Flight status': ['airborne']
--------------------------------------------------
Callsign: BTI2YM__
Valores únicos en 'Flight status': ['airborne']
--------------------------------------------------
Callsig

In [110]:
# Diccionario para almacenar tiempos de espera
wait_times = {}
# Iterar sobre cada vuelo identificado por Callsign
for callsign, group in df_chunk.groupby('Callsign'):
    # Verificar si el vuelo tiene tanto 'on-ground' como 'airborne'
    if not set(['on-ground', 'airborne']).issubset(group['Flight status'].unique()):
        continue

    # Obtener los mensajes 'on-ground' y 'airborne'
    ground_messages = group[group['Flight status'] == 'on-ground']
    airborne_messages = group[group['Flight status'] == 'airborne']

    if ground_messages.empty or airborne_messages.empty:
        continue

    # Obtener el primer mensaje 'on-ground' y el primer mensaje 'airborne'
    first_ground_message = ground_messages.iloc[0]
    print(first_ground_message)
    first_airborne_message = airborne_messages.iloc[0]
    print(first_airborne_message)
    
    last_airborne_message = airborne_messages.iloc[-1]
    if last_airborne_message['Timestamp (date)'] < first_ground_message['Timestamp (date)']:
        print(f"🔹 {callsign} tiene el mensaje 'airborne' antes del mensaje 'on-ground'. Ignorando este vuelo para el cálculo.")
        continue

    # Calcular el tiempo de espera entre el primer mensaje 'on-ground' y el primer mensaje 'airborne'
    wait_time = (first_airborne_message['Timestamp (date)'] - first_ground_message['Timestamp (date)']).total_seconds()
    print(wait_time)
    # Almacenar el tiempo de espera
    wait_times[callsign] = wait_time

# Mostrar los resultados
for callsign, wait_time in wait_times.items():
    print(f" El tiempo de espera para el vuelo {callsign} es {wait_time:.2f} segundos.")

Timestamp (kafka)                 1733013289644
ICAO                                     347604
Flight status                         on-ground
lat                                   40.462158
lon                                   -3.570088
Callsign                               AEA023__
Timestamp (date)     2024-12-01 01:34:49.644000
Name: 41910, dtype: object
Timestamp (kafka)                 1733014251492
ICAO                                     347604
Flight status                          airborne
lat                                   40.498445
lon                                   -3.574614
Callsign                               AEA023__
Timestamp (date)     2024-12-01 01:50:51.492000
Name: 60453, dtype: object
961.848
Timestamp (kafka)                 1733011200622
ICAO                                     343650
Flight status                         on-ground
lat                                   40.499107
lon                                   -3.591324
Callsign                  