In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import db_connection as db

In [13]:
df = db.read_db("flights_cleaned")
df.columns.tolist()
df.head()

Unnamed: 0,latitude,longitude,gps_altitude_m,distance_m,speed_km/s,climb_m,climb_m(delta),climb_rate_m/s,glide_ratio,bearing,delta_bearing,elapsed_time,delta_time,temp,pressure,humidity,dew_point,wind_speed,wind_deg
0,36.980983,29.314417,2083,7.54583,27.164986,-2.0,0.0,-2.0,3.772915,11,3.0,17.0,1.0,25.62,1008.0,35.0,9.04,0.66,75.0
1,36.98105,29.31445,2082,7.971536,28.69753,-3.0,0.0,-3.0,2.657179,21,10.0,18.0,1.0,25.62,1008.0,35.0,9.04,0.66,75.0
2,36.98115,29.3145,2080,11.957302,43.046289,-2.0,0.0,-2.0,5.978651,21,0.0,19.0,1.0,25.62,1008.0,35.0,9.04,0.66,75.0
3,36.981217,29.314567,2079,9.485179,34.146645,-1.0,-14.0,-1.0,9.485179,38,17.0,20.0,1.0,25.62,1008.0,35.0,9.04,0.66,75.0
4,36.981283,29.314633,2078,9.485176,34.146634,0.0,-14.0,0.0,0.0,38,0.0,21.0,1.0,25.62,1008.0,35.0,9.04,0.66,75.0


In [None]:
# 2. Prepare raw input & target arrays (float32)
feature_cols = [
    'gps_altitude_m', 'distance_m', 'speed_km/s', #'climb_m', 
    'glide_ratio', 'bearing', 'delta_bearing',
    'temp', 'pressure', 'humidity', 'dew_point',
    'wind_speed', 'wind_deg'
]
X_raw = df[feature_cols].values.astype(np.float32)
y_raw = df['climb_rate_m/s'].values.astype(np.float32)

# 3. Train-test split on raw data
split_frac = 0.8
split_idx = int(len(X_raw) * split_frac)

X_train_raw = X_raw[:split_idx]
y_train_raw = y_raw[:split_idx]
X_test_raw  = X_raw[split_idx:]
y_test_raw  = y_raw[split_idx:]

# 4. Scale features
scaler = StandardScaler()
scaler.fit(X_train_raw)

X_train_scaled = scaler.transform(X_train_raw)
X_test_scaled  = scaler.transform(X_test_raw)


In [15]:
# 5. Create tf.data datasets with sliding windows
T = 10
batch_size = 32

train_ds = tf.keras.preprocessing.timeseries_dataset_from_array(
    data=X_train_scaled,
    targets=y_train_raw,
    sequence_length=T,
    sequence_stride=1,
    shuffle=False,
    batch_size=batch_size
)

test_ds = tf.keras.preprocessing.timeseries_dataset_from_array(
    data=X_test_scaled,
    targets=y_test_raw,
    sequence_length=T,
    sequence_stride=1,
    shuffle=False,
    batch_size=batch_size
)

# 6. Positional encoding
def get_positional_encoding(sequence_length, d_model):
    pos = np.arange(sequence_length)[:, np.newaxis]
    i   = np.arange(d_model)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2*(i//2))/d_model)
    angle_rads  = pos * angle_rates
    sines = np.sin(angle_rads[:, 0::2])
    cosines = np.cos(angle_rads[:, 1::2])
    return tf.cast(np.concatenate([sines, cosines], axis=-1), tf.float32)

# 7. Build Transformer regression model
n_features = X_raw.shape[1]
d_model = 64
inputs = layers.Input(shape=(T, n_features))
x = layers.Dense(d_model)(inputs)
pos_encoding = get_positional_encoding(T, d_model)
x = x + pos_encoding

# Single encoder block
attn = layers.MultiHeadAttention(num_heads=4, key_dim=d_model)(x, x)
x = layers.LayerNormalization(epsilon=1e-6)(x + attn)
ffn = models.Sequential([
    layers.Dense(d_model*2, activation='relu'),
    layers.Dense(d_model),
])
x_ffn = ffn(x)
x = layers.LayerNormalization(epsilon=1e-6)(x + x_ffn)

x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(1)(x)

model = models.Model(inputs, outputs)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()


In [16]:
# 8. Train
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=15
)


Epoch 1/15
[1m37923/37923[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 6ms/step - loss: 0.1029 - mae: 0.1597 - val_loss: 0.0274 - val_mae: 0.1267
Epoch 2/15
[1m37923/37923[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 7ms/step - loss: 0.0069 - mae: 0.0461 - val_loss: 0.0113 - val_mae: 0.0778
Epoch 3/15
[1m37923/37923[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 7ms/step - loss: 0.0059 - mae: 0.0427 - val_loss: 0.0149 - val_mae: 0.0959
Epoch 4/15
[1m37923/37923[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 7ms/step - loss: 0.0031 - mae: 0.0309 - val_loss: 0.0045 - val_mae: 0.0455
Epoch 5/15
[1m10856/37923[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m2:39[0m 6ms/step - loss: 0.0026 - mae: 0.0269

KeyboardInterrupt: 

In [None]:

# 9. Evaluate on test set
y_pred = model.predict(test_ds).flatten()
y_true = np.concatenate([y for _, y in test_ds], axis=0)

mae  = np.mean(np.abs(y_true - y_pred))
rmse = np.sqrt(np.mean((y_true - y_pred)**2))
print(f'Test MAE:  {mae:.4f}')
print(f'Test RMSE: {rmse:.4f}')


In [None]:
# 10. Plot error histogram
errors = y_true - y_pred
plt.figure()
plt.hist(errors, bins=50)
plt.title('Prediction Error Histogram')
plt.xlabel('Error (m/s)')
plt.ylabel('Count')
plt.show()


In [None]:
# 8.5 Save the trained model
model.save('transformer_climb_model')