# Load in the raw balloon data files, combine, and sort.

Author: Brian Green (briangre@stanford.edu)

In [1]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    tqdm.pandas()

%matplotlib inline

In [2]:
# define the types to use for each column, to be intentional about these things
dtypes = {'flight_id': str,
          'time': str,
          'latitude': np.float64,
          'longitude': np.float64,
          'altitude': np.float64,
          'temperature': np.float64,
          'pressure': np.float64,
          'earth_ir': np.float64,
          'earth_ir_sensor_config': np.uint64,
          'acs': np.uint64,
          'propeller_on': np.uint64,
          'velocity_u': np.float64,
          'velocity_v': np.float64,
          'omega': np.float64,
          'acceleration': np.float64,
          'solar_elevation': np.float64,
          'solar_azimuth': np.float64,
          'is_daytime': np.uint64}

In [4]:
file_names = np.array(['../zenodo_data/loon-flights-2011-2014.csv',
                       '../zenodo_data/loon-flights-2015-2016.csv',
                       '../zenodo_data/loon-flights-2017Q1.csv',
                       '../zenodo_data/loon-flights-2017Q2.csv',
                       '../zenodo_data/loon-flights-2017Q3.csv',
                       '../zenodo_data/loon-flights-2017Q4.csv',
                       '../zenodo_data/loon-flights-2018Q1.csv',
                       '../zenodo_data/loon-flights-2018Q2.csv',
                       '../zenodo_data/loon-flights-2018Q3.csv',
                       '../zenodo_data/loon-flights-2018Q4.csv',
                       '../zenodo_data/loon-flights-2019Q1.csv',
                       '../zenodo_data/loon-flights-2019Q2.csv',
                       '../zenodo_data/loon-flights-2019Q3.csv',
                       '../zenodo_data/loon-flights-2019Q4.csv',
                       '../zenodo_data/loon-flights-2020Q1.csv',
                       '../zenodo_data/loon-flights-2020Q2.csv',
                       '../zenodo_data/loon-flights-2020Q3.csv',
                       '../zenodo_data/loon-flights-2020Q4.csv',
                       '../zenodo_data/loon-flights-2021Q1.csv',
                       '../zenodo_data/loon-flights-2021Q2.csv'])
nfiles = file_names.size
flights = pd.DataFrame()

for i in np.arange(nfiles):
    print(file_names[i][28:])
    flights_temp = pd.read_csv(file_names[i], parse_dates=['time'], infer_datetime_format=True,
                               dtype=dtypes, usecols=dtypes.keys())
    flights_temp = flights_temp[['flight_id','time','latitude','longitude','altitude',
                                 'pressure','acs','propeller_on','velocity_u','velocity_v']]
    flights_temp = flights_temp.sort_values(by=['flight_id', 'time']).reset_index(drop=True)
    
    print('Date range:')
    print(f'{np.min(flights_temp.time)} to {np.max(flights_temp.time)}')
    print(' ')
    flights = pd.concat([flights, flights_temp], ignore_index=True)
    
flights = flights.sort_values(by=['flight_id', 'time']).reset_index(drop=True)
print(f'Number of unique flights: {flights.flight_id.nunique()}')

# Remove the UTC timezone from flights to make the format easier to handle
flights['time'] = flights['time'].dt.tz_convert(None)

# Save to disk
flights.to_feather('temp_data/1_flights_sorted_new.feather')

2011-2014.csv
Date range:
2011-08-25 17:05:49+00:00 to 2014-12-31 23:59:56.476000+00:00
 
2015-2016.csv
Date range:
2015-01-01 00:00:01.889000+00:00 to 2016-12-31 23:59:57+00:00
 
2017Q1.csv
Date range:
2017-01-01 00:00:02+00:00 to 2017-03-31 23:59:58+00:00
 
2017Q2.csv
Date range:
2017-04-01 00:00:03+00:00 to 2017-06-30 23:59:59+00:00
 
2017Q3.csv
Date range:
2017-07-01 00:00:02+00:00 to 2017-09-30 23:59:59+00:00
 
2017Q4.csv
Date range:
2017-10-01 00:00:03+00:00 to 2017-12-31 23:59:59+00:00
 
2018Q1.csv
Date range:
2018-01-01 00:00:00+00:00 to 2018-03-31 23:59:59+00:00
 
2018Q2.csv
Date range:
2018-04-01 00:00:01+00:00 to 2018-06-30 23:59:55+00:00
 
2018Q3.csv
Date range:
2018-07-01 00:00:07+00:00 to 2018-09-30 23:59:57+00:00
 
2018Q4.csv
Date range:
2018-10-01 00:00:03+00:00 to 2018-12-31 23:59:59+00:00
 
2019Q1.csv
Date range:
2019-01-01 00:00:02+00:00 to 2019-03-31 23:59:59+00:00
 
2019Q2.csv
Date range:
2019-04-01 00:00:00+00:00 to 2019-06-30 23:59:57+00:00
 
2019Q3.csv
Date rang

In [4]:
flights

Unnamed: 0,flight_id,time,latitude,longitude,altitude,temperature,pressure,earth_ir,earth_ir_sensor_config,acs,propeller_on,velocity_u,velocity_v,omega,acceleration,solar_elevation,solar_azimuth,is_daytime
0,NR-384,2017-05-31 15:41:10+00:00,40.905778,-117.804139,1373.7,,859.13,414.74,2,0,0,,,0.0000,0.0000,35.110428,90.165696,1
1,NR-384,2017-05-31 15:42:10+00:00,40.905501,-117.801584,1616.2,,835.00,403.85,2,0,0,,,0.0000,0.0000,35.301181,90.331348,1
2,NR-384,2017-05-31 15:43:28+00:00,40.904456,-117.796707,1912.0,,805.65,398.27,2,0,0,,,0.0000,0.0000,35.550340,90.547824,1
3,NR-384,2017-05-31 15:44:43+00:00,40.903384,-117.794836,2210.0,,778.12,391.81,2,0,0,,,0.0000,0.0000,35.787790,90.754699,1
4,NR-384,2017-05-31 15:45:46+00:00,40.903281,-117.793374,2447.8,,756.14,383.17,2,0,0,,,0.0000,0.0000,35.987152,90.929463,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6995136,VG-023,2017-06-08 16:08:41+00:00,42.814225,-117.917287,1251.7,,869.43,,0,0,0,0.370,0.538,0.0019,0.0068,40.326197,95.183303,1
6995137,VG-023,2017-06-08 16:09:41+00:00,42.814972,-117.916946,1252.6,,869.62,,0,0,0,0.445,0.610,0.0023,0.0026,40.508880,95.368359,1
6995138,VG-023,2017-06-08 16:10:41+00:00,42.815328,-117.916318,1249.2,,869.79,,0,0,0,0.615,0.860,0.0031,0.0028,40.691750,95.553898,1
6995139,VG-023,2017-06-08 16:11:41+00:00,42.815888,-117.915676,1244.3,,870.04,,0,0,0,0.379,0.527,0.0025,0.0068,40.874551,95.740235,1
