In [None]:
# Data loading (features)

import pandas as pd
import json

json_features_file = open("flickr-features.json")

parsed_json_features = json.load(json_features_file)

df_features = pd.json_normalize(parsed_json_features["features"])

df_features = df_features[["latLng", "rank", "resolutions.HOUR.fnRank", "resolutions.HOUR.sigRank", "resolutions.HOUR.maxRank", "resolutions.DAYOFWEEK.fnRank", "resolutions.DAYOFWEEK.sigRank", "resolutions.DAYOFWEEK.maxRank", "resolutions.MONTH.fnRank", "resolutions.MONTH.sigRank", "resolutions.MONTH.maxRank"]]

df_hour = df_features[["latLng", "rank", 
    "resolutions.HOUR.fnRank", "resolutions.HOUR.sigRank", "resolutions.HOUR.maxRank"]].copy()

df_dayofweek = df_features[["latLng", "rank", 
    "resolutions.DAYOFWEEK.fnRank", "resolutions.DAYOFWEEK.sigRank", "resolutions.DAYOFWEEK.maxRank"]].copy()

df_month = df_features[["latLng", "rank", 
    "resolutions.MONTH.fnRank", "resolutions.MONTH.sigRank", "resolutions.MONTH.maxRank"]].copy()

return df_hour, df_dayofweek, df_month

Index(['rank', 'gridIndex', 'latLng', 'resolutions.ALL.isMaxima',
       'resolutions.ALL.maxRank', 'resolutions.ALL.fnRank',
       'resolutions.ALL.sigRank', 'resolutions.ALL.maxTime',
       'resolutions.ALL.sigMaxTime', 'resolutions.ALL.fn',
       'resolutions.ALL.scalars', 'resolutions.HOUR.isMaxima',
       'resolutions.HOUR.maxRank', 'resolutions.HOUR.fnRank',
       'resolutions.HOUR.sigRank', 'resolutions.HOUR.maxTime',
       'resolutions.HOUR.sigMaxTime', 'resolutions.HOUR.fn',
       'resolutions.HOUR.scalars', 'resolutions.DAYOFWEEK.isMaxima',
       'resolutions.DAYOFWEEK.maxRank', 'resolutions.DAYOFWEEK.fnRank',
       'resolutions.DAYOFWEEK.sigRank', 'resolutions.DAYOFWEEK.maxTime',
       'resolutions.DAYOFWEEK.sigMaxTime', 'resolutions.DAYOFWEEK.fn',
       'resolutions.DAYOFWEEK.scalars', 'resolutions.MONTH.isMaxima',
       'resolutions.MONTH.maxRank', 'resolutions.MONTH.fnRank',
       'resolutions.MONTH.sigRank', 'resolutions.MONTH.maxTime',
       'resolutions.M

In [None]:
# Computation Analysis (HOUR)

df_hour = arg[0]

df_hour = df_hour.rename(columns={"resolutions.HOUR.fnRank": "fnRank", "resolutions.HOUR.sigRank": "sigRank", "resolutions.HOUR.maxRank": "maxRank"})

df_hour = df_hour.dropna(subset=["fnRank"])
df_hour = df_hour.dropna(subset=["sigRank"])
df_hour = df_hour.dropna(subset=["maxRank"])

return df_hour


In [None]:
# Computation Analysis (DAYOFWEEK)

df_dayofweek = arg[1]

df_dayofweek = df_dayofweek.rename(columns={"resolutions.DAYOFWEEK.fnRank": "fnRank", "resolutions.DAYOFWEEK.sigRank": "sigRank", "resolutions.DAYOFWEEK.maxRank": "maxRank"})

df_dayofweek = df_dayofweek.dropna(subset=["fnRank"])
df_dayofweek = df_dayofweek.dropna(subset=["sigRank"])
df_dayofweek = df_dayofweek.dropna(subset=["maxRank"])

return df_dayofweek


In [None]:
# Computation Analysis (MONTH)

df_month = arg[2]

df_month = df_month.rename(columns={"resolutions.MONTH.fnRank": "fnRank", "resolutions.MONTH.sigRank": "sigRank", "resolutions.MONTH.maxRank": "maxRank"})

df_month = df_month.dropna(subset=["fnRank"])
df_month = df_month.dropna(subset=["sigRank"])
df_month = df_month.dropna(subset=["maxRank"])

return df_month


In [None]:
# Computation Analysis (computing rank - one for each period)

import numpy as np

df_scatterplot = arg[["rank", "fnRank", "sigRank", "maxRank"]]

df_scatterplot["combinedRanks"] = np.sqrt(
    df_scatterplot["maxRank"] ** 2 +
    df_scatterplot["fnRank"] ** 2 +
    df_scatterplot["sigRank"] ** 2
)

df_scatterplot['linked'] = df_scatterplot.index.to_series().apply(lambda x: [x])

return df_scatterplot

In [None]:
# Data pool (one for each period)

In [None]:
# Vega-Lite (one for each period)

{ 
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json", 
  "title": "RANK (HOUR)",
  "params": [ {"name": "clickSelect", "select": "interval"} ], 
  "mark": { "type": "point", "cursor": "pointer" }, 
  "encoding": { 
    "x": {"field": "combinedRanks", "type": "quantitative"},
    "y": {"field": "rank", "type": "quantitative"}, 
    "fillOpacity": { 
        "condition": {"param": "clickSelect", "value": 1}, 
        "value": 0.3 
    }, 
    "color": { 
      "field": "interacted", 
      "type": "nominal", 
      "condition": {
        "test": "datum.interacted === '1'", "value": "red", "else": "blue"} } 
  }, 
  "config": { "scale": { "bandPaddingInner": 0.2 } } 
} 



In [None]:
# Data Loading

import pandas as pd
import json

json_features_file = open("flickr-features.json")

parsed_json_features = json.load(json_features_file)

df_features = pd.json_normalize(parsed_json_features["features"])

df_features = df_features[["latLng"]]

return df_features


In [None]:
# Data transformation (connected to df_features - convert latLng into points and create buffer)

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

df_points = arg

df_points = df_points.explode("latLng", ignore_index=True)

df_points["geometry"] = df_points["latLng"].apply(lambda x: Point(x[1], x[0]))  # (lon, lat)

gdf_points = gpd.GeoDataFrame(df_points, geometry="geometry", crs="EPSG:4326")

gdf_points = gdf_points.drop(columns=["latLng"])

gdf_points = gdf_points.to_crs(3857)

gdf_points["geometry"] = gdf_points["geometry"].buffer(50)

gdf_points["value"] = 1

gdf_points['linked'] = gdf_points.index.to_series().apply(lambda x: [x])

gdf_points = gdf_points[["geometry", "value", "linked"]]

gdf_points = gdf_points.to_crs(3395)

gdf_points.metadata = {
    'name': 'pulse'
}

return gdf_points

In [None]:
# Data Pool

In [None]:
import utk

uc = utk.OSM.load([40.67187576076156, -74.0703927880446, 40.928446768674455, -73.8413807958497], layers=['parks'])

#parks
json_parks = uc.layers['json'][0]
gdf_parks = uc.layers['gdf']['objects'][0]
gdf_parks.metadata = {
 'name': 'parks',
 'style': 'parks'
}

return gdf_parks

In [None]:
import geopandas as gpd
from shapely.geometry import box

gdf_zip = gpd.read_file("nyc_zip.geojson")

gdf_zip = gdf_zip[["geometry"]]

gdf_zip = gdf_zip.to_crs("EPSG:4326")

min_lat, max_lat = 40.67187576076156, 40.928446768674455
min_lon, max_lon = -74.0703927880446, -73.8413807958497
bbox = box(min_lon, min_lat, max_lon, max_lat)
bbox_gdf = gpd.GeoDataFrame(geometry=[bbox], crs="EPSG:4326")

gdf_zip = gdf_zip[gdf_zip.within(bbox_gdf.iloc[0].geometry)]

gdf_zip = gdf_zip.to_crs("3395")

gdf_zip.metadata = {
    'name': 'zip'
}

return gdf_zip

In [None]:
import utk

uc = utk.OSM.load([40.67187576076156, -74.0703927880446, 40.928446768674455, -73.8413807958497], layers=['water'])

json_water = uc.layers['json'][0]
gdf_water = uc.layers['gdf']['objects'][0]
gdf_water.metadata = {
 'name': 'water',
 'style': 'water'
}

return gdf_water

In [None]:
# Merge (gdf_water, gdf_zip, gdf_parks, data pool)