In [118]:
import pandas as pd
from geopy.distance import geodesic
import math

def calculate_bearing(lat1, lon1, lat2, lon2):
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    delta_lon = lon2 - lon1

    x = math.sin(delta_lon) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1) * math.cos(lat2) * math.cos(delta_lon))

    initial_bearing = math.atan2(x, y)
    initial_bearing = math.degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360

    return compass_bearing


def calculate_distance(row):
    return geodesic((row["previous_latitude"], row["previous_longitude"]), (row["latitude"], row["longitude"])).meters


df = pd.read_csv("flight_data.csv")
df.head()

df["date"] = pd.to_datetime(df["date"] + " " + df["time"])

flight_loginterval = (df["date"].iloc[1] - df["date"].iloc[0]).total_seconds()

df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34763 entries, 0 to 34762
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   date                 34763 non-null  datetime64[ns]
 1   pilot                30878 non-null  object        
 2   time                 34763 non-null  object        
 3   latitude             34763 non-null  float64       
 4   longitude            34763 non-null  float64       
 5   gps_altitude_m       34763 non-null  int64         
 6   pressure_altitude_m  34763 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(2), object(2)
memory usage: 1.9+ MB


Unnamed: 0,date,pilot,time,latitude,longitude,gps_altitude_m,pressure_altitude_m
0,2013-10-26 08:47:00,,08:47:00,36.539,29.169517,1643,1703
1,2013-10-26 08:47:01,,08:47:01,36.538983,29.169433,1641,1702
2,2013-10-26 08:47:02,,08:47:02,36.538967,29.16935,1638,1702
3,2013-10-26 08:47:03,,08:47:03,36.538933,29.169283,1638,1701
4,2013-10-26 08:47:04,,08:47:04,36.5389,29.1692,1637,1701


In [119]:
df["elapsed_time"] = (df["date"] - df["date"].iloc[0]).dt.total_seconds()
df["previous_latitude"] = df["latitude"].shift(1)
df["previous_longitude"] = df["longitude"].shift(1)
df.drop(df.index[0], inplace=True)
df["climb_m"] = df["gps_altitude_m"].diff()
df["climb_rate_m/s"] = df["climb_m"] / df["date"].diff().dt.total_seconds()
df["distance_m"] = df.apply(calculate_distance, axis=1)
# df["total_distance_m"] = df["distance_m"].cumsum()
df["speed_km/s"] = ((df["distance_m"]/1000) / (df["date"].diff().dt.total_seconds()/3600))
# df["distance_from_start_m"] = df.apply(lambda row: geodesic((df["latitude"].iloc[0], df["longitude"].iloc[0]), (row["previous_latitude"], row["previous_longitude"])).meters, axis=1)
df["bearing"] = df.apply(lambda row: calculate_bearing(
    row["previous_latitude"], row["previous_longitude"],
    row["latitude"], row["longitude"]
), axis=1)
df["glide_ratio"] = df.apply(
    lambda row: row["distance_m"] / abs(row["climb_m"]) if row["climb_m"] != 0 else 0,
    axis=1
)
df.fillna(0, inplace=True)
df.drop(["pilot", "time", "previous_latitude", "previous_longitude", "climb_m", "distance_m"], axis=1, inplace=True)
df.to_csv("flight_data_processed.csv", index=False)
df.head()

Unnamed: 0,date,latitude,longitude,gps_altitude_m,pressure_altitude_m,elapsed_time,climb_rate_m/s,speed_km/s,bearing,glide_ratio
1,2013-10-26 08:47:01,36.538983,29.169433,1641,1702,1.0,0.0,0.0,256.021716,0.0
2,2013-10-26 08:47:02,36.538967,29.16935,1638,1702,2.0,-3.0,27.67666,256.021719,2.562654
3,2013-10-26 08:47:03,36.538933,29.169283,1638,1701,3.0,0.0,25.282214,238.105418,0.0
4,2013-10-26 08:47:04,36.5389,29.1692,1637,1701,4.0,-1.0,29.98318,243.533545,8.328661
5,2013-10-26 08:47:05,36.53885,29.169133,1638,1702,5.0,1.0,29.340168,226.970745,8.150047
