In [54]:
import pandas as pd
from geopy.distance import geodesic
from scipy.stats import zscore
import math
import os

In [55]:
def calculate_bearing(row):
    lat1 = math.radians(row["previous_latitude"])
    lon1 = math.radians(row["previous_longitude"])
    lat2 = math.radians(row["latitude"])
    lon2 = math.radians(row["longitude"])

    delta_lon = lon2 - lon1

    x = math.sin(delta_lon) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
                                           * math.cos(lat2) *
                                           math.cos(delta_lon))

    initial_bearing = math.atan2(x, y)
    initial_bearing = math.degrees(initial_bearing)
    compass_bearing = int((initial_bearing + 360) % 360)

    return compass_bearing


def calculate_distance(row):
    return geodesic((row["previous_latitude"], row["previous_longitude"]),
                    (row["latitude"], row["longitude"])).meters


def prepare_data(df):
    df["previous_latitude"] = df["latitude"].shift(1)
    df["previous_longitude"] = df["longitude"].shift(1)
    df["distance_from_takeoff_m"] = df.apply(
        lambda row: geodesic(
            (row["latitude"], row["longitude"]),
            (df["latitude"].iloc[0], df["longitude"].iloc[0])).meters, axis=1)
    df.drop(df.index[0], inplace=True)
    df["distance_m"] = df.apply(lambda row: geodesic(
        (row["previous_latitude"], row["previous_longitude"]),
        (row["latitude"], row["longitude"])).meters, axis=1)
    df["speed_km/s"] = ((df["distance_m"]/1000) /
                        (df["datetime"].diff().dt.total_seconds()/3600))
    df["climb_m"] = df["gps_altitude_m"].diff()
    df["climb_m(delta)"] = df["gps_altitude_m"].diff(20)
    df["climb_rate_m/s"] = df["climb_m"] / \
        df["datetime"].diff().dt.total_seconds()
    df["bearing"] = df.apply(calculate_bearing, axis=1)
    df["delta_bearing"] = abs((df["bearing"].diff() + 180) % 360 - 180)
    df["glide_ratio"] = df.apply(
        lambda row: row["distance_m"] /
        abs(row["climb_m"]) if row["climb_m"] != 0 else 0,
        axis=1)
    df.fillna(0, inplace=True)
    # find and delete before take off
    # for index, row in df.iterrows():
    #     if row["speed_km/s"] > 10:
    #         df = df.iloc[index:]
    #         df.reset_index(drop=True, inplace=True)
    #         break
    df["elapsed_time"] = (
        df["datetime"] - df["datetime"].iloc[0]).dt.total_seconds()
    df["zone"] = df.apply(lambda row: "thermal" if (
        row["climb_m(delta)"] > 0 or
        row["climb_rate_m/s"] > 0)
        else "standart", axis=1)
    df = df[df["zone"] == "thermal"]
    return df


In [56]:
df = pd.read_csv(os.path.join("data", "flight_data.csv"), low_memory=False)
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2344002 entries, 0 to 2344001
Data columns (total 13 columns):
 #   Column               Dtype  
---  ------               -----  
 0   filename             object 
 1   datetime             object 
 2   pilot                object 
 3   latitude             float64
 4   longitude            float64
 5   gps_altitude_m       int64  
 6   pressure_altitude_m  int64  
 7   temp                 float64
 8   pressure             float64
 9   humidity             float64
 10  dew_point            float64
 11  wind_speed           float64
 12  wind_deg             float64
dtypes: float64(8), int64(2), object(3)
memory usage: 232.5+ MB


Unnamed: 0,filename,datetime,pilot,latitude,longitude,gps_altitude_m,pressure_altitude_m,temp,pressure,humidity,dew_point,wind_speed,wind_deg
0,file_69489_2020-07-03-XCT-XXX-02.igc,2020-07-03 07:42:07,,36.980733,29.31435,2087,2130,25.62,1008.0,35.0,9.04,0.66,75.0
1,file_69489_2020-07-03-XCT-XXX-02.igc,2020-07-03 07:42:08,,36.980733,29.31435,2087,2130,25.62,1008.0,35.0,9.04,0.66,75.0
2,file_69489_2020-07-03-XCT-XXX-02.igc,2020-07-03 07:42:09,,36.980733,29.31435,2087,2130,25.62,1008.0,35.0,9.04,0.66,75.0
3,file_69489_2020-07-03-XCT-XXX-02.igc,2020-07-03 07:42:10,,36.980733,29.31435,2087,2130,25.62,1008.0,35.0,9.04,0.66,75.0
4,file_69489_2020-07-03-XCT-XXX-02.igc,2020-07-03 07:42:11,,36.980733,29.31435,2087,2130,25.62,1008.0,35.0,9.04,0.66,75.0


In [57]:
df.describe()

Unnamed: 0,latitude,longitude,gps_altitude_m,pressure_altitude_m,temp,pressure,humidity,dew_point,wind_speed,wind_deg
count,2344002.0,2344002.0,2344002.0,2344002.0,2344002.0,2344002.0,2344002.0,2344002.0,2344002.0,2344002.0
mean,38.43023,31.96094,2175.27,2623.137,30.78743,1007.535,26.87358,8.988506,3.196908,213.1267
std,1.048362,2.631969,2018.055,725.6296,3.560259,2.609563,7.290759,3.105485,1.352502,120.0057
min,36.50395,24.63248,-7347.0,0.0,18.38,999.0,6.0,-5.28,0.0,0.0
25%,37.46465,30.03208,1128.0,2120.0,28.4,1006.0,22.0,7.0,2.19,109.0
50%,38.38428,32.00002,2274.0,2623.0,30.51,1007.0,27.0,9.28,3.22,241.0
75%,39.18852,32.67773,2859.0,3107.0,33.13,1009.0,32.0,11.11,4.06,317.0
max,42.72443,41.0491,16199.0,9999.0,41.11,1018.0,58.0,22.25,8.7,360.0


In [58]:
# df["gps_altitude_m"] = df.apply(lambda row: row["gps_altitude_m"] if row["gps_altitude_m"] < 6000 else row["pressure_altitude_m"], axis=1)
# df["pressure_altitude_m"] = df.apply(lambda row: row["pressure_altitude_m"] if row["pressure_altitude_m"] < 6000 else row["gps_altitude_m"], axis=1)
df["gpszscore"] = zscore(df["gps_altitude_m"])
df["presurezscore"] = zscore(df["pressure_altitude_m"])
df = df[df["gpszscore"].abs() < 3]
df.describe()

Unnamed: 0,latitude,longitude,gps_altitude_m,pressure_altitude_m,temp,pressure,humidity,dew_point,wind_speed,wind_deg,gpszscore,presurezscore
count,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0,2274436.0
mean,38.40892,31.9333,1896.002,2633.565,30.87671,1007.431,26.75036,8.988623,3.155061,211.228,-0.138385,0.01437084
std,1.051894,2.66611,1222.744,727.9487,3.53894,2.526291,7.318322,3.092988,1.339419,119.605,0.6059026,1.003196
min,36.50395,24.63248,-3602.0,0.0,18.38,999.0,6.0,-5.28,0.0,0.0,-2.862792,-3.614982
25%,37.4376,29.99637,1028.0,2126.0,28.45,1006.0,22.0,7.03,2.14,95.0,-0.5685032,-0.685112
50%,38.351,31.76914,2235.0,2635.0,30.6,1007.0,27.0,9.28,3.16,240.0,0.02959773,0.01634798
75%,39.15744,32.63468,2809.0,3123.0,33.22,1009.0,32.0,11.11,4.01,315.0,0.3140301,0.6888675
max,42.72443,41.0491,4776.0,9999.0,41.11,1018.0,58.0,22.25,8.7,360.0,1.288731,10.16478
