In [1]:
import pandas as pd
import numpy as np
import pickle
import tensorflow as tf
from sklearn.metrics.pairwise import cosine_similarity
import os
import prepare_flightlog as fl


In [2]:
base_dir = os.path.dirname(os.getcwd())
flightlogs_dir = os.path.join(base_dir, "data", "flightlogs")
files = [f for f in os.listdir(flightlogs_dir)
         if f.endswith(".igc") or f.endswith(".IGC")]
file_path = os.path.join(flightlogs_dir, files[0])
df = fl.prepare_data(file_path)
df_new = df[['latitude', 'longitude',
                'gps_altitude_m', 'distance_m', 'speed_km/s',
                'climb_m', 'climb_m(delta)', 'climb_rate_m/s',
                'glide_ratio', 'bearing', 'delta_bearing',
                'elapsed_time', 'delta_time', 'temp',
                'pressure', 'humidity', 'dew_point',
                'wind_speed', 'wind_deg']]
df_new.head()

Unnamed: 0,latitude,longitude,gps_altitude_m,distance_m,speed_km/s,climb_m,climb_m(delta),climb_rate_m/s,glide_ratio,bearing,delta_bearing,elapsed_time,delta_time,temp,pressure,humidity,dew_point,wind_speed,wind_deg
0,40.031567,32.3282,1112,9.33672,33.612191,0.0,,0.0,0.0,322,19.0,19.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0
1,40.031617,32.32815,1112,7.002538,25.209136,1.0,-5.0,1.0,7.002538,322,0.0,20.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0
2,40.031683,32.3281,1115,8.544485,30.760145,1.0,-4.0,1.0,8.544485,330,8.0,21.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0
3,40.031733,32.32805,1115,7.002533,25.20912,0.0,-4.0,0.0,0.0,322,8.0,22.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0
4,40.031783,32.328,1115,7.002531,25.209113,-1.0,-5.0,-1.0,7.002531,322,0.0,23.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0


In [3]:
df_new.describe()

Unnamed: 0,latitude,longitude,gps_altitude_m,distance_m,speed_km/s,climb_m,climb_m(delta),climb_rate_m/s,glide_ratio,bearing,delta_bearing,elapsed_time,delta_time,temp,pressure,humidity,dew_point,wind_speed,wind_deg
count,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21407.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0,21408.0
mean,39.664984,32.965726,2152.724916,12.971721,46.648673,0.005512,0.110898,0.006041,6.792881,142.498552,8.99603,10727.409099,1.000794,27.739503,1007.19119,28.072029,7.583076,3.189573,296.513266
std,0.256278,0.583069,538.66325,4.995859,16.413621,2.152427,36.710276,2.150867,5.349895,76.447798,10.232374,6186.151049,0.116188,1.438497,1.079077,2.095369,1.170185,0.887649,23.890935
min,39.20215,32.315817,1010.0,0.0,0.0,-12.0,-90.0,-7.0,0.0,0.0,0.0,19.0,1.0,25.16,1006.0,24.0,5.26,1.63,210.0
25%,39.434546,32.393788,1822.0,9.361822,33.70256,-2.0,-31.0,-2.0,2.861726,96.0,2.0,5370.75,1.0,26.48,1006.0,26.0,6.79,2.6,287.0
50%,39.652058,32.787458,2152.0,13.266139,47.7581,0.0,1.0,0.0,5.835394,129.0,6.0,10722.5,1.0,28.07,1007.0,28.0,8.04,3.58,301.0
75%,39.900458,33.533704,2593.0,16.714214,60.171172,2.0,27.0,2.0,9.373873,186.0,13.0,16091.25,1.0,29.05,1008.0,29.0,8.44,3.94,314.0
max,40.043317,34.097983,3469.0,311.80977,89.784184,8.0,95.0,8.0,25.984148,353.0,180.0,21443.0,18.0,30.81,1009.0,32.0,9.63,5.14,328.0


In [4]:
# --- 1. Load saved scaler and Transformer model ---
scaler_path = os.path.join('models', 'scaler.pkl')
model_path  = os.path.join('models', 'transformer_tunned_model.keras')

with open(scaler_path, 'rb') as f:
    scaler = pickle.load(f)

model = tf.keras.models.load_model(model_path)
print(f"Loaded Transformer model from '{model_path}'")


# Define feature columns (everything except the target)
feature_cols = [c for c in df_new.columns if c != 'climb_rate_m/s']


Loaded Transformer model from 'models/transformer_tunned_model.keras'


In [5]:

# --- 3. Scale features ---
X_new = df_new[feature_cols].values.astype(np.float32)
X_new_scaled = scaler.transform(X_new)

# --- 4. Create sliding‐window dataset for inference ---
T = 10        # same window length as training
batch_size = 32

predict_ds = tf.keras.preprocessing.timeseries_dataset_from_array(
    data=X_new_scaled,
    targets=None,
    sequence_length=T,
    sequence_stride=1,
    shuffle=False,
    batch_size=batch_size
)

In [6]:
# --- 5. Run predictions ---
preds = model.predict(predict_ds).flatten()

# --- 6. Align predictions with original data indices or timestamps ---
# If your DataFrame has a timestamp column, you can use that instead.
# number of windows = N – T + 1
preds = model.predict(predict_ds).flatten()
# Align preds to df_new by taking index[T-1:]
indices = df_new.index[T-1:]
# Just in case, truncate to the same length:
indices = indices[: len(preds)]

indices = df_new.index[T-1 : T-1 + len(preds)]
df_new.loc[indices, 'predicted_climb_rate_m/s'] = preds

[1m669/669[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m669/669[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new.loc[indices, 'predicted_climb_rate_m/s'] = preds


In [7]:
df_new = df_new[df_new['predicted_climb_rate_m/s'].notna()]
df_new.head()

Unnamed: 0,latitude,longitude,gps_altitude_m,distance_m,speed_km/s,climb_m,climb_m(delta),climb_rate_m/s,glide_ratio,bearing,delta_bearing,elapsed_time,delta_time,temp,pressure,humidity,dew_point,wind_speed,wind_deg,predicted_climb_rate_m/s
10,40.03225,32.327767,1111,13.031988,46.915158,-1.0,-11.0,-1.0,13.031988,6,14.0,29.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0,1.393995
11,40.032383,32.3278,1109,15.075605,54.272178,-2.0,-13.0,-2.0,7.537802,10,4.0,30.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0,1.396086
12,40.0325,32.327867,1109,14.148772,50.93558,1.0,-12.0,1.0,14.148772,23,13.0,31.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0,1.233947
13,40.032617,32.327917,1109,13.638989,49.10036,1.0,-10.0,1.0,13.638989,18,5.0,32.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0,1.160494
14,40.032733,32.327983,1110,14.148765,50.935553,0.0,-10.0,0.0,0.0,23,5.0,33.0,1.0,26.09,1009.0,32.0,8.12,1.63,287.0,1.105236


In [9]:
y_true = df_new['climb_rate_m/s'].values
y_pred = df_new['predicted_climb_rate_m/s'].values

# 2) Pearson korelasyon (−1…1 → map 0…1)
pearson_r = np.corrcoef(y_true, y_pred)[0,1]
pearson_sim = (pearson_r + 1) / 2
pearson_percent = pearson_sim * 100
print(f"Pearson similarity: {pearson_percent:.2f}%")

Pearson similarity: 77.46%


In [12]:
# Orijinal y_true, y_pred
y_true_centered = y_true - np.mean(y_true)
y_pred_centered = y_pred - np.mean(y_pred)

# Normalize
y_true_n = y_true_centered / np.linalg.norm(y_true_centered)
y_pred_n = y_pred_centered / np.linalg.norm(y_pred_centered)

cos_sim2 = cosine_similarity(
    y_true_n.reshape(1,-1),
    y_pred_n.reshape(1,-1)
)[0,0]
print(f"Centered Cosine: {cos_sim2*100:.2f}%")

Centered Cosine: 54.91%
