In [1]:
# Importing standard libraries
import os
import sys
import math
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from geopy.distance import geodesic
import logging
import geopandas as gpd
import osmnx as ox
from shapely.geometry import Point, Polygon, MultiPolygon
# Importing existing functions from 'circling' module
from circling import compute_heading_transition, _calc_bearing, compute_overall_heading, detect_overall_circling, detect_circling_behavior
import geopandas as gpd
# Importing custom IGC parser
from parser import igc2df  # Ensure this function correctly parses IGC files into DataFrames
import rasterio
from rasterio.features import rasterize
from rasterio.transform import from_origin, rowcol
from pathlib import Path
import tqdm 
from joblib import Parallel, delayed
import ee
from pyproj import Transformer
from pilot_behavior_functions import compute_time_weighted_average, extract_tas_field_positions, parse_b_records, get_true_airspeed_stats_for_engine_runs_from_row, get_all_engine_start_speeds, get_first_engine_agl, get_agl_all_events, count_pct_engine_starts_below_1000, LANDCOVER_CLASSES, calc_speed_mean_post_event_window, calc_speed_mean_std_pre_event_window, convert_to_datetime, define_time_window, load_flight_data, determine_steady_descent_during_pre_event_window, calculate_descent_rate, calculate_distance_traveled, detect_climb_attempt, detect_multiple_start_stop, clean_time_entries, get_first_engine_event, get_terrain_label_at_first_engine_event_gee, parse_int_list, parse_float_list, parse_engine_run_info, get_dem_elevation_gee
import re
from typing import Dict, List, Tuple, Any, Optional
from ee_helpers import ensure_ee_initialized, LANDCOVER_DATASET


Window Start: 2024-06-24 23:00:00 Window End: 2024-06-24 23:04:00


In [2]:
# Configure logging
logging.basicConfig(
    filename='flight_processing.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)


In [3]:
glider_types_df = pd.read_csv("glider-types - capstone.csv")

In [6]:
# Define the path to your CSV file
csv_file_path1 = 'transformed data 700 600 150/Flt-times-updated.586395.csv'
csv_file_path2 = 'transformed data 700 600 150/Flt-times-updated.586473_wgc.csv'
# Load the updated CSV
try:
    df1 = pd.read_csv(csv_file_path1)
    df2 = pd.read_csv(csv_file_path2)
    df =  pd.concat([df1, df2], ignore_index=True)
    logging.info("CSV file loaded and merged successfully.")
except Exception as e:
    logging.error(f"Error loading CSV file: {e}")
    sys.exit(1)


2025-04-15 22:26:38,866 - INFO - CSV file loaded and merged successfully.


# List all files in the "TAS" folder.
tas_files = set(os.listdir("TAS"))

# Filter the DataFrame to keep only rows where the File is in the TAS folder.
df= df[df["File"].isin(tas_files)]


print(df.shape)

In [7]:
df['Gtype'] = df['Gtype'].astype(str).str.strip()



In [8]:
# Unique replacements dictionary (only new, non-repeating keys)
unique_replacements = {
    "15m_N369QT_JS-3": "JS3",
    "18mN369QTJS-3": "JS3",
    "JS3-18A860": "JS3",
    "808C": "DG-808 C",
    "808CComp": "DG-808 C",
    "808C Comp": "DG-808 C",
    "AMSFlightCaratA": "Carat",
    "Antares20E": "Antares 20E",
    "ApisM15m": "APIS",
    "Arcus": "ARCUS",
    "ArcusM": "ARCUS M",
    "Arcus M": "ARCUS M",
    "ARCUSM": "ARCUS M",
    "ARCUSM2pilot": "ARCUS M",
    "ArcusM2pilot": "ARCUS M",
    "ARCUST": "ARCUS T",
    "ArcusT": "ARCUS T",
    "as33": "AS 33ES",
    "AS 33 Es 18m": "AS 33ES",
    "AS 33Es 18m": "AS 33ES",
    "AS33-18": "AS 33ES",
    "AS33Es": "AS 33ES",
    "AS33ES": "AS 33ES",
    "18mAS33@ventus": "AS 33ES",
    "AS33Es18m": "AS 33ES",
    "ASW24E":"ASW-24E",
    "ASG 29E 15m": "ASW 27-18 E (ASG-29E)",
    "ASG 29E 18m": "ASW 27-18 E (ASG-29E)",
    "ASG 29ES": "ASW 27-18 E (ASG-29E)",
    "ASG 32 Mi": "ASG 32 MI",
    "ASG-29": "ASW 27-18 E (ASG-29E)",
    "ASG-32Mi": "ASG 32 MI",
    "ASG29": "ASW 27-18 E (ASG-29E)",
    "ASG2918": "ASW 27-18 E (ASG-29E)",
    "ASG29E": "ASW 27-18 E (ASG-29E)",
    "ASG29E.": "ASW 27-18 E (ASG-29E)",
    "ASG29E15m": "ASW 27-18 E (ASG-29E)",
    "ASG29E18m": "ASW 27-18 E (ASG-29E)",
    "ASG29Es": "ASW 27-18 E (ASG-29E)",
    "NixusAsh30polar": "ASH30 NIXUS",
    "ASG31Mi": "ASG31mi",
    "ASG32Mi": "ASG 32 MI",
    "ASG 32Mi": "ASG 32 MI",
    "ASG32mi": "ASG 32 MI",
    "ASH 31 MI-Wet": "ASH 31 MI",
    "ASH 31 MI18": "ASH 31 MI",
    "ASH 31 MI21m": "ASH 31 MI",
    "ASH 31/18m": "ASH 31 MI",
    "ASH 31 mi": "ASH 31 MI",
    "ASH 31/21m": "ASH 31 MI",
    "ASH-31": "ASH 31 MI",
    "ASH 31 Mi": "ASH 31 MI",
    "ASH31/18m": "ASH 31 MI",
    "ash-26e": "ASH 26 E",
    "ASH25mi": "ASH25M",
    "ASH26E": "ASH 26 E",
    "ASH 26E": "ASH 26 E",
    "ASH30Mi": "ASH 30 MI",
    "ASH31/21M": "ASH 31 MI",
    "ASH31/21m": "ASH 31 MI",
    "ASH31/21m-Wet": "ASH 31 MI",
    "ASH3118": "ASH 31 MI",
    "ASH3118Mjune": "ASH 31 MI",
    "ASH31  18M  june": "ASH 31 MI",
    "ASH31": "ASH 31 MI",
    "ASH3121": "ASH 31 MI",
    "ASH31mi": "ASH 31 MI",
    "ASH 31/21m-Wet": "ASH 31 MI",
    "ASH31MI": "ASH 31 MI",
    "ASH31Mi": "ASH 31 MI",
    "ASH31mi18": "ASH 31 MI",
    "ASH31Mi18m": "ASH 31 MI",
    "ASH31-18": "ASH 31 MI",
    "ASH31Mi21m": "ASH 31 MI",
    "ASH31mi21m": "ASH 31 MI",
    "AstirCS": "ASTIR CS",
    "ASW 27-18 E (ASG-29E)15m": "ASW 27-18 E (ASG-29E)",
    "ASG-29E-18": "ASW 27-18 E (ASG-29E)",
    "ASW 27-18 E (ASG-29E)18": "ASW 27-18 E (ASG-29E)",
    "ASW 27-18 E (ASG-29E)18M": "ASW 27-18 E (ASG-29E)",
    "ASW 27-18 E (ASG-29E)18m": "ASW 27-18 E (ASG-29E)",
    "ASW 27-18 E (ASG-29E)m": "ASW 27-18 E (ASG-29E)",
    "ASW 27-18 E (ASG-29E)s": "ASW 27-18 E (ASG-29E)",
    "ASG29ES": "ASW 27-18 E (ASG-29E)",
    "ASW-27b": "ASW 27",
    "ASW 27B": "ASW 27",
    "ASW-27FES": "ASW-27",
    "ASW-27Min5Pct": "ASW-27",
    "ASW15": "ASW-15",
    "ASW19": "ASW-19",
    "ASW20": "ASW20F",
    "ASW22BL": "ASW-22BL",
    "ASW24": "ASW-24",
    "ASW27": "ASW-27",
    "ASW27-18E": "ASW 27-18 E (ASG-29E)",
    "ASW2718": "ASW 27-18 E (ASG-29E)",
    'ASG29/18m': "ASW 27-18 E (ASG-29E)",
    'ASG2915m': "ASW 27-18 E (ASG-29E)",
    'ASG2918M': "ASW 27-18 E (ASG-29E)",
    'ASG29Es/18m': "ASW 27-18 E (ASG-29E)",
    "ASW27B": "ASW-27",
    "ASW27b": "ASW 27",
    "ASW27FES": "ASW 27",
    "ASW27Min5Pct": "ASW-27",
    "ASW28": "ASW-28",
    "CARAT": "Carat",
    "CaratA": "Carat",
    "Carat A": "Carat",
    "CARATA": "Carat",
    "DISCUS2A": "DISCUS 2A",
    "D2CT": "DISCUS-2CT",
    "Discus 2cT": "DISCUS-2CT",
    "Discus 2CT": "DISCUS-2CT",
    "DG-1000T": "DG-1000T",
    "DG-400/17m": "DG-400",
    "DG-400 17": "DG-400",
    "DG-40017": "DG-400",
    "DG-400M/17/17m": "DG-400M/17",
    "DG-800B/18m": "DG-800B",
    "DG-800B15": "DG-800B",
    "DG-800BB": "DG-800B",
    "DG-800BB15": "DG-800B",
    "DG-800BS/18m": "DG-800B",
    "DG800B": "DG-800B",
    'DG800B15': "DG-800B",
    "Dg-808": "DG-800B",
    "DG-808 C15": "DG-808 C",
    "DG-808 C18": "DG-808 C",
    "DG-808B": "DG-808 C",
    "DG-808C": "DG-808 C",
    "DG-808C18": "DG-808 C",
    "DG1000/20m": "DG-1000M",
    "DG1000t": "DG-1000T",
    "DG1001M": "DG-100",
    "DG400": "DG-400",
    "DG400/17m": "DG-400",
    "DG800": "DG-800",
    "dG800.15": "DG-800",
    "DG800/18m": "DG-800",
    "DG80015": "DG-800",
    "DG800S/18m": "DG-800S",
    "DG808B": "DG-808 C",
    "800B15": "DG-800B",
    "DG808B/18m": "DG-808 C",
    "DG800B": "DG-808 C",
    "DG800B15":"DG-808 C",
    "DG808C15":"DG-808 C",
    "DG808C18":"DG-808 C",
    "DG808c": "DG-808 C",
    "DG808C": "DG-808 C",
    "DG808S": "DG-808 S",
    "Diana-2": "SZD-56-3 DIANA-2 FES",
    "Diana2": "SZD-56-3 DIANA-2 FES",
    "Discu2": "DISCUS-2B",
    "Discus": "DISCUS",
    "DISCUS-2BA": "DISCUS-2B",
    "DISCUS-2Bb": "DISCUS-2B",
    "DISCUS-2BB": "DISCUS-2B",
    "DISCUS-2BC": "DISCUS-2B",
    "DISCUS-2BCT": "DISCUS-2B",
    "DISCUS-2Bct": "DISCUS-2B",
    "DISCUS-2BT/18m": "DISCUS-2B",
    "DISCUS-2C": "DISCUS-2CT",
    "Discus2": "DISCUS-2B",
    "DISCUS2": "DISCUS-2B",
    "Discus2b": "DISCUS-2B",
    "Discus2B": "DISCUS-2B",
    "Discus2c": "DISCUS 2C",
    "DISCUS2C": "DISCUS 2C",
    "Discus 2c": "DISCUS 2C",
    "DISCUS2CT": "DISCUS-2CT",
    "Discus2ct": "DISCUS-2CT",
    "Discus2CT": "DISCUS-2CT",
    "Discus2cT": "DISCUS-2CT",
    "Discus2T/18m": "DISCUS 2T",
    "DiscusbT": "DISCUS-2B",
    "DISCUSbT": "DISCUS BT",
    "DiscusCS": "DISCUS CS",
    "Discus CS": "DISCUS CS",
    "Discus 2": "DISCUS 2A",
    "DiscusIIa": "DISCUS 2A",
    "DUODISCU": "DUO DISCUS",
    "DuoDiscus": "DUO DISCUS",
    "DUODISCUS": "DUO DISCUS",
    "Duo Discus": "DUO DISCUS",
    "DUODiscus": "DUO DISCUS",
    "DuoDiscusT": "DUO DISCUS T",
    "DuoDiscusXL": "DUO DISCUS",
    "DuoDiscusXLT": "DUO DISCUS T",
    "EB29/25.3m": "EB29",
    "EB 29/28.3m": "EB29",
    "EB29/28.3m": "EB29",
    "EB 29/25.3m": "EB29",
    "EB 29R": "EB29R",
    "EB29RGBD": "EB29R",
    "Genesis2": "GENESIS 2",
    "GLASFLUGEL 304 MSS": "GLASFLUGEL 304 MS",
    "GLASFLUGEL 304S JETJ": "GLASFLUGEL 304S JET",
    "Glasflugel304MS": "GLASFLUGEL 304 MS",
    "HpH 304MS": "GLASFLUGEL 304 MS",
    "HpH 304M": "GLASFLUGEL 304 MS",
    "Hph304C": "GLASFLUGEL 304C",
    "HpH304M": "GLASFLUGEL 304 MS",
    "HpH304MS": "GLASFLUGEL 304 MS",
    "HpH304S": "GLASFLUGEL 304S JET",
    "Hph304S": "GLASFLUGEL 304S JET",
    "Hph304 S": "GLASFLUGEL 304S JET",
    "HpH304SJ": "GLASFLUGEL 304S JET",
    "HpH 304SJ": "GLASFLUGEL 304S JET",
    "JonkersJS3": "JS3",
    "18m N369QT JS-3": "JS3",
    "18m_N369QT_JS-3": "JS3",
    "JS-1": "JS1-C",
    "JS-1-18m": "JS1-C",
    "JS-1-21m": "JS1-C",
    "JS-3-15m": "JS3",
    "JS-3-18m": "JS3",
    "JS3-18m": "JS3",
    "JS1-C-21m": "JS1-C",
    "JS3jet": "JS3",
    "JS-3-18m jet": "JS3",
    "JS-3-18mjet": "JS3",
    "JS-3-18m_MM": "JS3",
    "JS-3-18mAbm": "JS3",
    "JS3-18A860": "JS3",
    "JS-5-24m": "JS-5",
    "JS5": "JS-5",
    "LAK-17A/18m": "LAK-17A",
    "LAK-17bMini": "LAK-17B FES MINI",
    "Lak-17bMini.": "LAK-17B FES MINI",
    "Lak-17b Mini.": "LAK-17B FES MINI",
    "LAK17/18m": "LAK-17B FES",
    "LAK17b FES": "LAK-17B FES",
    "LAK-13.5m FES": "LAK-17B FES",
    "LAK-13.5mFES": "LAK-17B FES",
    "LAK17AT": "LAK-17AT",
    "LAK17AT/18m": "LAK-17AT",
    "LAK17B-18FES": "LAK-17B FES",
    "LAK17BFES": "LAK-17B FES",
    "LAK17bFES": "LAK-17B FES",
    "LS-8": "LS8",
    "LS 10-s-18": "LS 10-ST",
    "LS10-s-18": "LS 10-ST",
    "LS10-st-18": "LS 10-ST",
    "LS 10-st-18": "LS 10-ST",
    "Ls10st": "LS 10-ST",
    "ls3a": "LS3-A",
    "LS4": "LS 4A",
    "LS_4": "LS 4A",
    "Nimbus3DM": "NIMBUS 3",
    "PIK20D": "PIK-20",
    "Pik_20e": "PIK-20E",
    "PW5": "PW-5",
    "PW5-Smyk": "PW-5",
    "PW5_Smyk": "PW-5",
    "S-10-VT": "S10-V",
    "SGS1-26": "SGS 1-26A",
    "SILENT 2 TARGAelec.": "SILENT 2 TARGA",
    "SILENT-IN_P": "SILENT-IN",
    "Silent2": "SILENT 2 TARGA",
    "Silent2elec.": "SILENT 2 ELECTRO",
    "Silent2 elec.": "SILENT 2 ELECTRO",
    "Silent_2": "SILENT 2 TARGA",
    "SILENT_IN": "SILENT-IN",
    "STANDARDCIRRUS": "STANDARD CIRRUS G/81",
    "Schweizer1.26":"SGS 1-26E",
    "SZD-55": "SZD-55-1",
    "TAURUS": "Taurus",
    "HpHTwinshark": "Twinshark",
    "304TSTwinSharkMTS": "Twinshark",
    "TEST": "TST-10 M",
    "TSt": "TST-10 M",
    "TST-10AtlasM": "TST-10 M",
    "TST10Atlas": "TST-10 M",
    "TST10M35L": "TST-10 M",
    "TST14M": "TST-10 M",
    "Unknown": "UNKNOWN",
    "V2CM": "VENTUS 2CM",
    "V2cxa": "VENTUS 2CX",
    "V2cxa16M": "VENTUS 2CX",
    "V2cxa15M": "VENTUS 2CX",
    "Ventus 2cxt-18": "VENTUS 2CXT",
    "V2cXT": "VENTUS 2CXT",
    "V2CXT": "VENTUS 2CXT",
    "V2cxt18": "VENTUS 2CXT",
    "V3F18": "VENTUS 3F",
    "Ventus 3F": "VENTUS 3F",
    "VENTUS 2CT": "VENTUS 2CT",
    "VENTUS 2CX15M": "VENTUS 2CX",
    "VENTUS 3F-15": "VENTUS 3F",
    "Ventus 3F 15m": "VENTUS 3F",
    "VENTUS 3F-15M": "VENTUS 3F",
    "VENTUS 3F-18": "VENTUS 3F",
    "VENTUS 3F15m": "VENTUS 3F",
    "VENTUS 3F18M": "VENTUS 3F",
    "Ventus 3FES": "VENTUS 3F",
    "Ventus-3FES": "VENTUS 3F",
    "Ventus 3": "VENTUS 3F",
    "Ventus 3 15m_V2": "VENTUS 3F",
    "Ventus 3F 18m": "VENTUS 3F",
    "Ventus2/15m": "VENTUS 2A",
    "Ventus2/18m": "VENTUS 2C",
    "Ventus2b": "VENTUS 2B",
    "Ventus2Bx": "VENTUS 2 BX",
    "Ventus2C": "VENTUS 2C",
    "Ventus2c": "VENTUS-2C",
    "Ventus2CM": "VENTUS 2CM",
    "VENTUS2cm": "VENTUS 2C",
    "Ventus2cM18m": "VENTUS 2CM",
    "VENTUS2CT": "VENTUS 2CT",
    "Ventus2Cx18": "VENTUS 2CX",
    "Ventus2CXFES": "VENTUS 2CX",
    "Ventus2cx": "VENTUS 2CX",
    "Ventus2cxM": "VENTUS 2CXM",
    "Ventus2CXM": "VENTUS 2CXM",
    "VENTUS2CX": "VENTUS 2CXM",
    "Ventus2Cxt": "VENTUS 2CXT",
    "Ventus2cxt": "VENTUS 2CXT",
    "Ventus2cxT": "VENTUS 2CXT",
    "VeNTUS2CxT": "VENTUS 2CXT",
    "Ventus2cxt-18": "VENTUS 2CXT",
    "Ventus 2cxt": "VENTUS 2CXT",
    "Ventus2cxt15": "VENTUS 2CXT",
    "Ventus2cxt18": "VENTUS 2CXT",
    "VENTUS2cxT18": "VENTUS 2CXT",
    "Ventus315m_V2": "VENTUS 3M",
    "Ventus3F": "VENTUS 3F",
    "Ventus3F18m": "VENTUS 3F",
    "Ventus3F15M": "VENTUS 3F",
    "Ventus3F15m": "VENTUS 3F",
    "Ventus3F-15": "VENTUS 3F",
    'Ventus3F-18': "VENTUS 3F",
    'Ventus3F18M': "VENTUS 3F",
    "Ventus3FES": "VENTUS 3F",
    "Ventus3M": "VENTUS 3M",
    "Ventus3M032021": "VENTUS 3M",
    "Ventus3M18m": "VENTUS 3M",
    "Ventus3M18M": "VENTUS 3M",
    "Ventus3M2021": "VENTUS 3M",
    "Ventus3M2021DLR": "VENTUS 3M",
    "Ventus3MDLR": "VENTUS 3M",
    "Ventus 3M 032021": "VENTUS 3M",
    "Ventus 3M 2021": "VENTUS 3M",
    "Ventus 3M 2021 DLR": "VENTUS 3M",
    "Ventus3t": "VENTUS 3T",
    "Ventus3T18_W": "VENTUS 3T",
    "Ventus 3T 18_W": "VENTUS 3T",
    "Ventus3T": "VENTUS 3T",
    "VENTUSB": "VENTUS B/16.6",
    "VentusbT16.6m": "VENTUS BT",
    "VentusC/17m": "VENTUS C",
    "VENTUSCT": "VENTUS CT",
    "VETNUSCM": "VENTUS CM",
    "VENTUSCM": "VENTUS CM"
}

# Apply the unique replacements to the 'Gtype' column:
df['Gtype'] = df['Gtype'].replace(unique_replacements, regex=False)


In [9]:
len(df['Gtype'].unique())

117

In [10]:
df['Gtype'].unique()

array(['VENTUS 3F', 'LAK-17B FES', 'VENTUS 3M', 'UNKNOWN', 'DG-800',
       'ASH 31 MI', 'TST-10 M', 'DISCUS-2B', 'VENTUS 2CXM', 'ASW-27',
       'ASW 27-18 E (ASG-29E)', 'VENTUS15', 'ARCUS', 'ASH 26 E', 'U',
       'SZD-55-1', 'ASG 32 MI', 'VENTUS 2CXT', 'JS1-C', 'PIK-20',
       'ARCUS M', 'SILENT-IN', 'SZD-56-3 DIANA-2 FES', 'AS 33ES',
       'DISCUS-2CT', 'JS3', 'DG-400', 'VENTUS 2CT', 'ASH 30 MI', '18M',
       'DG-808 C', 'DISCUS CS', 'DG-800B', 'Ventus2', 'DISCUS 2C',
       'Antares 20E', 'ARCUS T', 'LAK-17AT', 'GLASFLUGEL 304 MS',
       'VENTUS 2B', 'ASW-27B', 'Carat', 'VENTUS 2CM', 'Taurus',
       'GENESIS 2', 'Nimeta', 'ASW 27', 'GLASFLUGEL 304C', 'EB29',
       'LS8-18', 'DISCUS', 'VENTUS 2A', '21M', 'LS-4', 'VENTUS 2C',
       'DISCUS 2T', 'SILENT 2 TARGA', 'DUO DISCUS T', 'ASH25M',
       'VENTUS CT', 'ASW20F', 'VENTUS-2C', 'STANDARD CIRRUS G/81',
       'Uentus2b', 'VENTUS 2CX', 'VENTUS2/15', 'ASW-24E', 'DG-808 S',
       'AC-5M', 'DUO DISCUS', 'LS 4A', 'VENTUS B/16.6'

In [11]:
#df = df.head(500)

In [12]:
pure_gliders = pd.read_csv('glider-types - Pure Gliders.csv')

pure_gliders = pure_gliders['Manufacture']

df = df[~df['Gtype'].isin(pure_gliders)]
df = df[df['Gtype']!='Unknown']
# Optionally, reset the index


In [13]:
len(df['Gtype'].unique())

59

In [14]:
exclude = ['18M', '21', '21M', 'ANTARES18S', 'Default', 'Motorglider', 'nan', 'U', 'Uentus2b', 
           'VENTUS15', 'Ventus2', 'VENTUS2/15', 'ventus2a', 'Ventus3', 'VentusN139P']
df = df[~df['Gtype'].isin(exclude)]


In [15]:
sorted_unique_gtypes = sorted(df['Gtype'].unique(), key=str.lower)
sorted_unique_gtypes


['AC-5M',
 'Antares 20E',
 'ARCUS M',
 'AS 33ES',
 'ASG 32 MI',
 'ASH 26 E',
 'ASH 30 MI',
 'ASH 31 MI',
 'ASH25M',
 'ASH30 NIXUS',
 'ASW 27-18 E (ASG-29E)',
 'ASW-24E',
 'Carat',
 'DG-1000M',
 'DG-1000T',
 'DG-400',
 'DG-800B',
 'DG-808 C',
 'DISCUS-2CT',
 'DUO DISCUS T',
 'EB29',
 'EB29R',
 'JS-5',
 'LAK-17AT',
 'LAK-17B FES MINI',
 'LS 10-ST',
 'LS-10',
 'N808EE',
 'Nimeta',
 'PIK20E',
 'SILENT 2 ELECTRO',
 'SILENT-IN',
 'SZD-56-3 DIANA-2 FES',
 'Taurus',
 'Twinshark',
 'VENTUS 2CM',
 'VENTUS 2CT',
 'VENTUS 2CX',
 'VENTUS 2CXM',
 'VENTUS 3F',
 'VENTUS 3M',
 'VENTUS 3T',
 'VENTUS CM',
 'VENTUS CT']

In [16]:
# Create a folder called "dropped" in the current directory if it doesn't exist
dropped_folder = "dropped"
os.makedirs(dropped_folder, exist_ok=True)

# --- Step 1: Drop rows where 'Sensor Info' is NaN ---
# Identify the rows to drop (for later saving)
dropped_sensor_info = df[df['Sensor Info'].isna()].copy()
# Drop them from df
df = df.dropna(subset=['Sensor Info'])

# --- Step 2: Drop rows where ALL three engine run start time columns are NaN ---
engine_cols = ["ENL_Engine_Run_Start_Times", "MOP_Engine_Run_Start_Times", "RPM_Engine_Run_Start_Times"]

# One-line drop: this drops rows if all specified columns are NaN.
# But first, save the rows that will be dropped.
mask_all_null = df[engine_cols].isna().all(axis=1)
dropped_engine_runs = df[mask_all_null].copy()
df = df.dropna(subset=engine_cols, how='all')

# --- Step 3: Save the dropped rows into CSV files inside the "dropped" folder ---
dropped_sensor_info.to_csv(os.path.join(dropped_folder, "dropped_sensor_info.csv"), index=False)
dropped_engine_runs.to_csv(os.path.join(dropped_folder, "dropped_engine_runs.csv"), index=False)


In [17]:
sensor_cols = ["ENL_Engine_Run_Start_Times", "MOP_Engine_Run_Start_Times", "RPM_Engine_Run_Start_Times"]

for col in sensor_cols:
    if col in df.columns:
        df[col] = df[col].apply(
            lambda x: (
                ",".join(
                    ("" if float(item) == 0 else str(int(float(item)))
                      for item in str(x).split(",") if item.strip())
                )
            ) if pd.notnull(x) else x
        )


In [18]:
# Assume glider_types_df is already defined (e.g., loaded from your CSV)
# and get_first_engine_event is defined as per our previous discussion.

# Apply get_first_engine_event to each row, storing the result in a temporary DataFrame.
event_df = df.apply(
    lambda row: get_first_engine_event(
        row,
        sensor_types=["ENL", "MOP", "RPM"],
        glider_types_df=glider_types_df
    ),
    axis=1
)

# Join the new event columns back into your original DataFrame.
df = df.join(event_df)



2025-04-15 22:26:47,228 - INFO - Row 2020-08-18-CNI-20Y-01.IGC: Not a Self-Launch glider, using earliest engine event time 165705.
2025-04-15 22:26:47,228 - INFO - Row 2020-08-18-CNI-20Y-01.IGC: first_event_time set to 165705 from sensor(s): ENL
2025-04-15 22:26:47,229 - INFO - Row 46RGJUM1.IGC: Not a Self-Launch glider, using earliest engine event time 193703.
2025-04-15 22:26:47,229 - INFO - Row 46RGJUM1.IGC: first_event_time set to 193703 from sensor(s): ENL
2025-04-15 22:26:47,232 - INFO - Row 2018-06-29-NKL-10Q-01.IGC: Not a Self-Launch glider, using earliest engine event time 184738.
2025-04-15 22:26:47,232 - INFO - Row 2018-06-29-NKL-10Q-01.IGC: first_event_time set to 184738 from sensor(s): ENL
2025-04-15 22:26:47,233 - INFO - Row 2018-06-09-CNI-20E-01.IGC: Not a Self-Launch glider, using earliest engine event time 202616.
2025-04-15 22:26:47,233 - INFO - Row 2018-06-09-CNI-20E-01.IGC: first_event_time set to 202616 from sensor(s): ENL
2025-04-15 22:26:47,235 - INFO - Row 2023-

In [19]:
# Convert first_event_time to datetime
df['first_event_datetime'] = df.apply(convert_to_datetime, axis=1)
df['flight_start_datetime'] = df['Start Time'].apply(lambda x: datetime.strptime(str(x), '%H%M%S'))

# Now define the 5-minute window using both columns
df[['window_start', 'window_end']] = df.apply(
    lambda row: pd.Series(define_time_window(row['first_event_datetime'], row['flight_start_datetime'], window_minutes=5)),
    axis=1
)




In [20]:
S2H = 3600          # Seconds to Hours
M2F = 3.28084       # Meters to Feet
K2M = 0.621371

In [21]:
def process_single_flight_non_speed(idx, row):
    """
    Processes a flight row to compute all non-speed engine run metrics.
    This version does NOT compute the TAS statistics.
    It returns a dictionary with:
      - engine_run_agls
      - altitude_rate_pre_event_window (ft/s)
      - distance_traveled_during_window (miles)
      - is_circling_during_pre_event_window
      - terrain_elev_at_engine_run (ft)
      - terrain_label_at_engine_run
      - Engine_Start_Above_1000ft
      - engine_run_times (s)
      - height_gain_loss (ft)
    Speed columns will be left blank.
    """
    ensure_ee_initialized()
    results = {'index': idx}
    results.update({
        'engine_run_agls': "",
        'altitude_rate_pre_event_window (ft/s)': "",
        'distance_traveled_during_window (miles)': "",
        'is_circling_during_pre_event_window': "",
        'terrain_elev_at_engine_run (ft)': "",
        'terrain_label_at_engine_run': "",
        'Engine_Start_Above_1000ft': "",
        'engine_run_times (s)': "",
        'height_gain_loss (ft)': "",
        # Leave speed columns blank for now.
        'avg_speed_while_engine_running (knots)': "",
        'min_speed_while_engine_running (knots)': "",
        'max_speed_while_engine_running (knots)': ""
    })
    
    try:
        flight_file = row['File']
        flight_id = Path(flight_file).stem
        df_flight = load_flight_data(flight_id, row.get('Date (MM/DD/YYYY)', ''))
        if df_flight is None or df_flight.empty:
            return results
        flight_start_dt = row['flight_start_datetime']
        
        # Determine which engine run start time column to use.
        if pd.notnull(row.get('ENL_Engine_Run_Start_Times', "")) and str(row['ENL_Engine_Run_Start_Times']).strip() != "":
            run_times_str = row['ENL_Engine_Run_Start_Times']
            alt_col = 'ENL_Engine_Run_Altitudes_AGL'
        elif pd.notnull(row.get('MOP_Engine_Run_Start_Times', "")) and str(row['MOP_Engine_Run_Start_Times']).strip() != "":
            run_times_str = row['MOP_Engine_Run_Start_Times']
            alt_col = 'MOP_Engine_Run_Altitudes_AGL'
        elif pd.notnull(row.get('RPM_Engine_Run_Start_Times', "")) and str(row['RPM_Engine_Run_Start_Times']).strip() != "":
            run_times_str = row['RPM_Engine_Run_Start_Times']
            alt_col = 'RPM_Engine_Run_Altitudes_AGL'
        else:
            run_times_str = ""
            alt_col = None
        if run_times_str == "":
            return results
        
        run_times_list = [rt.strip() for rt in run_times_str.split(",") if rt.strip()]
        if alt_col and pd.notnull(row.get(alt_col, "")):
            altitude_values_list = [val.strip() for val in str(row.get(alt_col, "")).split(",") if val.strip()]
        else:
            altitude_values_list = []
        
        engine_run_agls = []
        run_alt_rate = []
        run_distance = []
        run_circling = []
        engine_run_terrain_elevs = []
        engine_run_terrain_labels = []
        engine_run_above_1000 = []
        
        for i, rt in enumerate(run_times_list):
            try:
                event_dt = datetime.strptime(row['Date (MM/DD/YYYY)'] + ' ' + rt, '%m/%d/%Y %H%M%S')
            except Exception as e:
                logging.warning(f"Error parsing engine run datetime for flight '{flight_file}', token {rt}: {e}")
                continue
            window_start, window_end = define_time_window(event_dt, flight_start_dt, window_minutes=5)
            mask = (df_flight['timestamp'] >= window_start) & (df_flight['timestamp'] <= window_end)
            df_window = df_flight.loc[mask]
            if not df_window.empty:
                try:
                    alt_rate = calculate_descent_rate(df_window, window_start, window_end)
                except Exception as e:
                    logging.warning(f"Flight '{flight_file}', engine run {i}: error computing altitude rate: {e}")
                    alt_rate = pd.NA
                run_alt_rate.append(str(alt_rate) if pd.notna(alt_rate) else "")
                
                try:
                    dist = calculate_distance_traveled(df_window, window_start, window_end)
                except Exception as e:
                    logging.warning(f"Flight '{flight_file}', engine run {i}: error computing distance traveled: {e}")
                    dist = pd.NA
                run_distance.append(str(dist) if pd.notna(dist) else "")
                
                try:
                    circling = detect_circling_behavior(df_window, window_start, window_end)
                except Exception as e:
                    logging.warning(f"Flight '{flight_file}', engine run {i}: error detecting circling: {e}")
                    circling = ""
                run_circling.append(str(circling))
                
                df_flight['time_diff'] = (df_flight['timestamp'] - event_dt).abs()
                try:
                    event_row = df_flight.loc[df_flight['time_diff'].idxmin()]
                except Exception as e:
                    logging.warning(f"Flight '{flight_file}', engine run {i}: error finding closest event row: {e}")
                    event_row = None
            else:
                run_alt_rate.append("")
                run_distance.append("")
                run_circling.append("")
                event_row = None
            
            if i < len(altitude_values_list):
                eng_agl = altitude_values_list[i]
            else:
                eng_agl = pd.NA
            engine_run_agls.append(str(eng_agl))
            
            try:
                if pd.notna(eng_agl) and str(eng_agl).strip() != "":
                    engine_run_above_1000.append("True" if float(eng_agl) > 1000 else "False")
                else:
                    engine_run_above_1000.append("")
            except Exception as e:
                logging.warning(f"Flight '{flight_file}', engine run {i}: error computing Engine_Start_Above_1000ft: {e}")
                engine_run_above_1000.append("")
            
            if event_row is not None:
                try:
                    event_lat = event_row['latitude']
                    event_lon = event_row['longitude']
                    terrain_elev_m = get_dem_elevation_gee(event_lat, event_lon)
                    if terrain_elev_m is not None and isinstance(terrain_elev_m, (int, float)) and not np.isnan(terrain_elev_m):
                        terrain_elev_ft = terrain_elev_m * M2F
                    else:
                        terrain_elev_ft = pd.NA
                except Exception as e:
                    logging.warning(f"Flight '{flight_file}', engine run {i}: error retrieving terrain elevation: {e}")
                    terrain_elev_ft = pd.NA
                engine_run_terrain_elevs.append(str(terrain_elev_ft) if pd.notna(terrain_elev_ft) else "")
                
                try:
                    terrain_label = get_terrain_label_at_first_engine_event_gee(event_lat, event_lon)
                    engine_run_terrain_labels.append(terrain_label if terrain_label else "")
                except Exception as e:
                    logging.warning(f"Flight '{flight_file}', engine run {i}: error retrieving terrain label: {e}")
                    engine_run_terrain_labels.append("")
            else:
                engine_run_terrain_elevs.append("")
                engine_run_terrain_labels.append("")
        
        # Store non-speed per-engine-run metrics as comma-separated strings.
        results['engine_run_agls'] = ",".join(engine_run_agls)
        results['altitude_rate_pre_event_window (ft/s)'] = ",".join(run_alt_rate)
        results['distance_traveled_during_window (miles)'] = ",".join(run_distance)
        results['is_circling_during_pre_event_window'] = ",".join(run_circling)
        results['terrain_elev_at_engine_run (ft)'] = ",".join(engine_run_terrain_elevs)
        results['terrain_label_at_engine_run'] = ",".join(engine_run_terrain_labels)
        results['Engine_Start_Above_1000ft'] = ",".join(engine_run_above_1000)
        
        # Process engine run times and height gain/loss.
        sensor_info = row.get("Sensor Info", "")
        event_sensor = row.get("event_sensor", "")
        engine_run_times, height_gains = parse_engine_run_info(sensor_info, sensor=event_sensor)
        results["engine_run_times (s)"] = ",".join(str(x) for x in engine_run_times)
        results["height_gain_loss (ft)"] = height_gains
        
    except Exception as e:
        logging.warning(f"General error processing flight {idx}: {e}")
    
    return results

In [22]:
logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s')
logger = logging.getLogger()

In [23]:
ee.Authenticate()

True

In [24]:
import csv

In [25]:
# =============================================================================
# Stage 1 Main Block: Process Non-Speed Columns
# =============================================================================
if __name__ == '__main__':
    logging.info("Starting Stage 1: Processing non-speed columns...")
    
    
    # Ensure required columns exist; for example, "flight_start_datetime" must have been computed earlier.
    # (Add your pre-processing step here if needed.)
    
    # Process each flight in parallel, computing non-speed columns.
    flight_rows = list(df.iterrows())
    results_list = Parallel(n_jobs=-1, verbose=5)(
        delayed(process_single_flight_non_speed)(idx, row) for idx, row in flight_rows
    )
    results_df = pd.DataFrame(results_list)
    
    # Make sure "index" is present in both DataFrames.
    if "index" not in df.columns:
        df = df.reset_index()
    if "index" not in results_df.columns:
        results_df = results_df.reset_index()
    
    # List of non-speed new columns.
    new_columns = [
        "engine_run_agls",
        "altitude_rate_pre_event_window (ft/s)",
        "distance_traveled_during_window (miles)",
        "is_circling_during_pre_event_window",
        "terrain_elev_at_engine_run (ft)",
        "terrain_label_at_engine_run",
        "Engine_Start_Above_1000ft",
        "engine_run_times (s)",
        "height_gain_loss (ft)"
    ]
    missing_cols = [col for col in new_columns if col not in results_df.columns]
    if missing_cols:
        logging.warning(f"Missing non-speed columns in results_df: {missing_cols}")
    
    # Merge non-speed computed results back into the main DataFrame.
    df = df.merge(results_df, on="index", how="outer")
    
    # Save intermediate DataFrame without speed columns.
    #df.to_csv("flights_final_without_speed.csv", sep="\t", index=False, quoting=csv.QUOTE_ALL)
    logging.info("✅ Stage 1 complete: Non-speed columns saved as flights_final_without_speed.csv")
    
    # =============================================================================
    # Stage 2: Process Speed Columns Separately
    # =============================================================================
    logging.info("Starting Stage 2: Processing speed columns...")
    
    # Prepare lists to collect speed values.
    avg_speed_list = []
    min_speed_list = []
    max_speed_list = []
    
    # Iterate through each row in the now updated df.
    for idx, row in df.iterrows():
        try:
            # Now that "engine_run_times (s)" exists in df, we can compute TAS stats.
            tas_stats = get_true_airspeed_stats_for_engine_runs_from_row(row, debug=False)
            if tas_stats and len(tas_stats) > 0:
                avg_speeds = [f"{avg:.2f}" for (_, avg, _, _) in tas_stats]
                min_speeds = [f"{min_val:.2f}" for (_, _, min_val, _) in tas_stats]
                max_speeds = [f"{max_val:.2f}" for (_, _, _, max_val) in tas_stats]
                avg_speed_list.append(",".join(avg_speeds))
                min_speed_list.append(",".join(min_speeds))
                max_speed_list.append(",".join(max_speeds))
            else:
                avg_speed_list.append("")
                min_speed_list.append("")
                max_speed_list.append("")
        except Exception as e:
            logging.warning(f"Error processing speed for row {idx}: {e}")
            avg_speed_list.append("")
            min_speed_list.append("")
            max_speed_list.append("")
    
    # Insert the speed columns into df.
    df["avg_speed_while_engine_running (knots)"] = avg_speed_list
    df["min_speed_while_engine_running (knots)"] = min_speed_list
    df["max_speed_while_engine_running (knots)"] = max_speed_list
    df = df.dropna(subset=['first_event_time'])

    # Save the final DataFrame.
    df.to_csv("flights_final.csv", sep="\t", index=False, quoting=csv.QUOTE_ALL)
    logging.info("✅ Stage 2 complete: Final CSV with speed columns saved as flights_final.csv")

2025-04-15 22:26:54,184 - INFO - Starting Stage 1: Processing non-speed columns...
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
2025-04-15 22:26:59,717 - INFO - Earth Engine initialized successfully in worker.
2025-04-15 22:26:59,719 - INFO - Earth Engine initialized successfully in worker.
2025-04-15 22:26:59,720 - INFO - Attempting to load IGC file for Flight '25HC48C1': filtered/25HC48C1.IGC
2025-04-15 22:26:59,720 - INFO - Attempting to load IGC file for Flight '2020-08-18-CNI-20Y-01': filtered/2020-08-18-CNI-20Y-01.IGC
2025-04-15 22:26:59,792 - INFO - File '25HC48C1': IGC file loaded successfully.
2025-04-15 22:26:59,796 - INFO - Descent rate calculated: 0.07692307692307693 ft/s
2025-04-15 22:26:59,800 - INFO - Distance traveled calculated: 0.11260708510918248 miles
2025-04-15 22:26:59,866 - INFO - Earth Engine initialized successfully in worker.
2025-04-15 22:26:59,867 - INFO - Attempting to load IGC file for Flight '2018-06-29-NKL-10Q-01': filtere

DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 44
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 44
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 44, end = 48
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 44, end = 48
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46




DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 44, end = 48
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 44, end = 48
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS pos

2025-04-15 22:27:48,282 - INFO - ✅ Stage 2 complete: Final CSV with speed columns saved as flights_final.csv


DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 44, end = 48
DEBUG: Found TAS positions: start = 42, end = 46
DEBUG: Found TAS positions: start = 44, end = 48


In [26]:
df=df[df['min_speed_while_engine_running (knots)']!="0.00"]

In [27]:
df['Engine_Start_Above_1000ft'].value_counts()

Engine_Start_Above_1000ft
False                                                                                              105
True                                                                                                94
False,True                                                                                          60
False,False                                                                                         46
True,True                                                                                           17
True,False                                                                                           9
False,False,True                                                                                     8
False,True,True                                                                                      7
False,False,False                                                                                    6
True,True,True                                 

In [None]:
df.columns

In [None]:
Gtypes= set(df['Gtype'])

In [None]:
# Convert the set to a list and then to a DataFrame
df_gtypes = pd.DataFrame(list(Gtypes), columns=['Gtype'])

# Write the DataFrame to a CSV file (without the index)
#df_gtypes.to_csv('Gtypes.csv', index=False)

In [None]:
# If the column is numeric, just assign it.
df['avg_speed_while_engine_running (knots)'] = pd.to_numeric(
    df['avg_speed_while_engine_running (knots)'],
    errors='coerce'
)
