In [None]:
# Configure Jupyter so figures appear in the notebook
%matplotlib inline

# Configure Jupyter to display the assigned value after an assignment
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'

import os
import gzip
import csv

import pandas as pd
import numpy as np

In [None]:
def generate_dirpaths(dirname='opensky-network.org'):
    """Yields the filenames that should be in a given directory.
    
    dirname: string directory name
    
    yields: directories under dirname and files they contain
    """
    for dirpath, dirnames, filenames in os.walk(dirname):
        if len(dirnames) == 0:
            yield dirpath, filenames

In [None]:
def download_datafiles(date, dirname='opensky-network.org/datasets/states'):
    """Download
    
    date: string 'YYYY-MM-DD' format
    
    """
    for hour in range(24):
        hour = '%0.2d' % hour
        
        filename = 'states_%s-%s.csv.tar' % (date, hour)

        cmd = 'wget -r https://%s/%s/%s/%s' % (dirname, date, hour, filename)
        print(cmd)

In [None]:
date = '2018-02-05'
# download_datafiles(date)

In [None]:
import tarfile

In [None]:
filename = 'opensky-network.org/datasets/states/2017-07-24/00/states_2017-07-24-00.csv.tar'

In [None]:
tf = tarfile.open(filename)

In [None]:
tf.list()

In [None]:
for member in tf.getmembers():
    if member.name.startswith('states'):
        f = tf.extractfile(member)
        g = gzip.open(f)
        line = g.readline()
        print(line)

In [None]:
def open_tarfile(filename):
    tf = tarfile.open(filename)
    for member in tf.getmembers():
        if member.name.endswith('.csv.gz'):
            f = tf.extractfile(member)
            g = gzip.open(f, mode='rt')
            reader = csv.reader(g)
            return reader

In [None]:
def open_tarfile_chunks(filename):
    tf = tarfile.open(filename)
    for member in tf.getmembers():
        if member.name.endswith('.csv.gz'):
            f = tf.extractfile(member)
            reader = pd.read_csv(f, compression='gzip', chunksize=10000)
            return reader

In [None]:
reader = open_tarfile_chunks(filename)
reader

In [None]:
chunk = next(reader)
chunk.head()

In [None]:
from math import radians, cos, sin, asin, sqrt

def haversine(*args):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    
    From https://stackoverflow.com/questions/4913349/haversine-formula
    -in-python-bearing-and-distance-between-two-gps-points
    """
    # convert decimal degrees to radians 
    lat1, lon1, lat2, lon2 = np.deg2rad(args)

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [None]:
reader = open_tarfile(filename)
header = next(reader)
header

## Get flights through a volume

In [None]:
logan = 42.3656, -71.0096

In [None]:
res = {}

for i, line in enumerate(reader):
    time, icao24 = line[:2]
    try:
        lat = float(line[2])
        lon = float(line[3])
    except ValueError:
        continue
        #print(line)
        
    d = haversine(lat, lon, *logan)
    if d < 10:
        res.setdefault(icao24, []).append(time)
        #print(line)
        
    if i > 10000000:
        break

In [None]:
len(res)

In [None]:
res

In [None]:
import redis

r = redis.Redis(
    host='localhost',
    port=6379)

In [None]:
r.set('foo', 'bar')

In [None]:
r.get('foo')

In [None]:
def process_file(reader, redis, nrows=1e9):
    for i, line in enumerate(reader):
        time, icao24 = line[:2]
        try:
            lat = float(line[2])
            lon = float(line[3])
        except ValueError:
            continue
            #print(line)
        
        d = haversine(lat, lon, *logan)
        if d < 10:
            key = 'icao24:' + icao24
            r.hsetnx(key, time, line)
            #print(line)
        
        if i > nrows:
            break

In [None]:
reader = open_tarfile(filename)
header = next(reader)

%time process_file(reader, r)

In [None]:
keys = r.keys()
keys

In [None]:
r.hgetall('icao24:a65c95')

In [None]:
def delete_all(r):
    keys = r.keys()
    return r.delete(*keys)

In [None]:
r.keys()

In [None]:
from datetime import datetime
import pytz

In [None]:
start = datetime(2017, 9, 3, 18, 0, 0)

eastern = pytz.timezone('US/Eastern')
loc_dt = eastern.localize(start)

fmt = '%Y-%m-%d %H:%M:%S %Z%z'
loc_dt.strftime(fmt)

In [None]:
start.timestamp()

In [None]:
eastern = pytz.timezone('US/Eastern')

In [None]:
loc_dt = eastern.localize(start)
loc_dt

In [None]:
loc_dt.timestamp()

In [None]:
loc_dt.astimezone(pytz.utc)

In [None]:
loc_dt.astimezone(pytz.utc).timestamp()

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('data.csv', sep='|', skiprows=2)
df.columns

In [None]:
from io import StringIO

def clean_csv(filename):
    res = StringIO()
    with open(filename) as f:
        for line in f:
            if line.startswith('|'):
                line = line.strip('|').replace(' ', '')
                res.write(line)
    res.seek(0)
    return res

In [None]:
res = clean_csv('data2.csv')
df = pd.read_csv(res, sep='|')
df.columns

In [None]:
df.head()

In [None]:
df.velocity.describe()

In [None]:
df.callsign.unique()

In [None]:
df.describe()

In [None]:
for column in df.columns:
    print(column, df[column].dtype)

In [None]:
df.onground.value_counts()

In [None]:
df.time

In [None]:
grouped = df.groupby('icao24')
len(grouped)

In [None]:
for name, group in grouped:
    print(name, len(group))

In [None]:
grouped.get_group('4006b2')

In [None]:
df[df.baroaltitude<1000]