# [Awkward-array](https://awkward-array.org/quickstart.html)

In [1]:
import awkward as ak
import numpy as np

In [2]:
# list of 
dataset = [
    [{'x':1, "y":[101]}],
    [{'x':4, "y":[101,202]}],
    [{'x':9, "y":[103,203,303]}],
    [],
    [{'x':33, "y":[104,102,3034]}]]

array=ak.Array(dataset)


In [3]:
array[:]

<Array [[{x: 1, y: [101], ... 102, 3034]}]] type='5 * var * {"x": int64, "y": va...'>

In [4]:
array[:,:]

<Array [[{x: 1, y: [101], ... 102, 3034]}]] type='5 * var * {"x": int64, "y": va...'>

In [5]:
array[:,:,'y']

<Array [[[101]], [[101, ... [[104, 102, 3034]]] type='5 * var * var * int64'>

In [6]:
array[:,:,'x']

<Array [[1], [4], [9], [], [33]] type='5 * var * int64'>

In [7]:
array[:,:,'y',0]

<Array [[101], [101], [103], [], [104]] type='5 * var * int64'>

In [8]:
array[:,:,'y',0]+np.sqrt(array[:,:,'x'])

<Array [[102], [103], [106], [], [110]] type='5 * var * float64'>

In [9]:
# second item,all items, y field/key, last item
array[2,:,"y",-1]

<Array [303] type='1 * int64'>

In [10]:
#  Working with variable-length-numerical arrays
ar=ak.Array([[1,2,3],[1,2],[1,2,3,9]])

print(f'ar => {ar}')
print(f'ar*2 => {ar*2}')

print(f'ar+1 => {ar+1}')

print(f'ar**2 => {ar**2}')

ar => [[1, 2, 3], [1, 2], [1, 2, 3, 9]]
ar*2 => [[2, 4, 6], [2, 4], [2, 4, 6, 18]]
ar+1 => [[2, 3, 4], [2, 3], [2, 3, 4, 10]]
ar**2 => [[1, 4, 9], [1, 4], [1, 4, 9, 81]]


# Play Data

In [11]:


import urllib.request
import json
url = "https://raw.githubusercontent.com/Chicago/osd-bike-routes/master/data/Bikeroutes.geojson"
bikeroutes_json = urllib.request.urlopen(url).read()
bikeroutes_pyobj = json.loads(bikeroutes_json)

In [12]:
bikeroutes = ak.from_json(bikeroutes_json)
# Alternatively, bikeroutes = ak.Record(bikeroutes_pyobj)
bikeroutes

<Record ... [-87.7, 42], [-87.7, 42]]]}}]} type='{"type": string, "crs": {"type"...'>

In [13]:
ak.type(bikeroutes)

{"type": string, "crs": {"type": string, "properties": {"name": string}}, "features": var * {"type": string, "properties": {"STREET": string, "TYPE": string, "BIKEROUTE": string, "F_STREET": string, "T_STREET": option[string]}, "geometry": {"type": string, "coordinates": var * var * var * float64}}}

In [14]:
# Slicing 
# or bikeroutes.features.geometry.coordinates
bikeroutes["features", "geometry", "coordinates"]

<Array [[[[-87.8, 41.9], ... [-87.7, 42]]]] type='1061 * var * var * var * float64'>

In [15]:
ak.to_list(bikeroutes.features[751])

{'type': 'Feature',
 'properties': {'STREET': 'E 26TH ST',
  'TYPE': '1',
  'BIKEROUTE': 'EXISTING BIKE LANE',
  'F_STREET': 'S STATE ST',
  'T_STREET': 'S DR MARTIN LUTHER KING JR DR'},
 'geometry': {'type': 'MultiLineString',
  'coordinates': [[[-87.62685625163756, 41.845587148411795],
    [-87.62675996392576, 41.84558902593194],
    [-87.62637708895348, 41.845596494328554],
    [-87.62626461651281, 41.845598326696425],
    [-87.62618268489399, 41.84559966093136],
    [-87.6261438116618, 41.84560027230502],
    [-87.62613206507362, 41.845600474403334],
    [-87.6261027723024, 41.8456009526551],
    [-87.62579736038116, 41.84560626159298],
    [-87.62553890383363, 41.845610239979905],
    [-87.62532611036139, 41.845613593674],
    [-87.6247932635836, 41.84562202574476]],
   [[-87.62532611036139, 41.845613593674],
    [-87.6247932635836, 41.84562202574476]],
   [[-87.6247932635836, 41.84562202574476],
    [-87.62446484629729, 41.84562675013391],
    [-87.62444032614908, 41.845627092762

In [16]:
# Array Math
longitude = bikeroutes.features.geometry.coordinates[..., 0]
longitude

<Array [[[-87.8, -87.8, ... -87.7, -87.7]]] type='1061 * var * var * float64'>

In [17]:
latitude = bikeroutes.features.geometry.coordinates[..., 1]
latitude

<Array [[[41.9, 41.9, 41.9, ... 42, 42, 42]]] type='1061 * var * var * float64'>

In [18]:
# Use numpy extension package
np.mean(latitude)

41.86357020732942

# Benchmarking Performance

In [19]:
%%timeit

route_length = []
for route in bikeroutes_pyobj["features"]:
    path_length = []
    for segment in route["geometry"]["coordinates"]:
        segment_length = []
        last = None
        for lng, lat in segment:
            km_east = lng * 82.7
            km_north = lat * 111.1
            if last is not None:
                dx2 = (km_east - last[0])**2
                dy2 = (km_north - last[1])**2
                segment_length.append(np.sqrt(dx2 + dy2))
            last = (km_east, km_north)
        path_length.append(sum(segment_length))
    route_length.append(sum(route_length))

70.3 ms ± 13.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
%%timeit

km_east = bikeroutes.features.geometry.coordinates[..., 0] * 82.7
km_north = bikeroutes.features.geometry.coordinates[..., 1] * 111.1

segment_length = np.sqrt((km_east[:, :, 1:] - km_east[:, :, :-1])**2 +
                         (km_north[:, :, 1:] - km_north[:, :, :-1])**2)

path_length = np.sum(segment_length, axis=-1)
route_length = np.sum(path_length, axis=-1)

13.1 ms ± 492 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
import numba as nb

@nb.jit
def compute_lengths(bikeroutes):
    route_length = np.zeros(len(bikeroutes.features))
    for i in range(len(bikeroutes.features)):
        for path in bikeroutes.features[i].geometry.coordinates:
            first = True
            last_east, last_north = 0.0, 0.0
            for lng_lat in path:
                km_east = lng_lat[0] * 82.7
                km_north = lng_lat[1] * 111.1
                if not first:
                    dx2 = (km_east - last_east)**2
                    dy2 = (km_north - last_north)**2
                    route_length[i] += np.sqrt(dx2 + dy2)
                first = False
                last_east, last_north = km_east, km_north
    return route_length

compute_lengths(bikeroutes)

array([0.24076035, 0.09706818, 0.2025815 , ..., 1.42737517, 0.34667691,
       0.28063495])