diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 35c906a..52ff2e3 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -32,6 +32,9 @@ jobs: with: cache-compiled: "true" cache-registries: "true" + - name: Preload some libraries + run: | + julia --project -e "using GeoDataFrames" - name: Set up Quarto uses: quarto-dev/quarto-actions/setup@v2 @@ -43,9 +46,13 @@ jobs: # Extra repositories: extra-repositories: | https://josiahparry.r-universe.dev + - name: R deps uses: r-lib/actions/setup-r-dependencies@v2 with: cache-version: 2 extra-packages: | any::sf + + - name: Render Quarto Project + uses: quarto-dev/quarto-actions/render@v2 diff --git a/Project.toml b/Project.toml index 4723a3d..cf8d8cb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,2 +1,3 @@ [deps] GeoDataFrames = "62cb38b5-d8d2-4862-a48e-6a340996859f" +Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" diff --git a/blog.qmd b/blog.qmd index f129f06..5a6999c 100644 --- a/blog.qmd +++ b/blog.qmd @@ -32,6 +32,21 @@ knitr::opts_chunk$set(message = FALSE, warning = FALSE) # install python packages needed for this document: pkgs = c("geopandas", "shapely", "pandas", "rasterio", "matplotlib", "folium", "mapclassify") reticulate::py_install(pkgs) +x = .libPaths() +write(x, stderr()) +Sys.setenv(LD_LIBRARY_PATH = "") +``` + +```{julia} +using Pkg +println(stderr, ENV) +using Libdl +println.(stderr, Libdl.DL_LOAD_PATH); +println(stderr, "===") +println.(stderr, Libdl.dllist()); +Pkg.status() +Pkg.activate(".") +Pkg.status() ``` # Introduction @@ -71,7 +86,7 @@ See detailed description of [R](https://ogh23.robinlovelace.net/tidy#vector-data ## Python -```{python} +```python import pandas as pd from shapely import Point import geopandas as gpd @@ -95,7 +110,7 @@ library(tmap) ## Julia -```julia +```{julia} using GeoDataFrames ``` @@ -110,7 +125,7 @@ Most projects start with pre-generated data, but it's useful to create datasets ## Python -```{python} +```python poi = gpd.GeoDataFrame([ {"name": "Faculty", "geometry": Point(16.9418, 52.4643)}, {"name": "Hotel ForZa", "geometry": Point(16.9474, 52.4436)}, @@ -142,7 +157,7 @@ The following commands download data from the internet. ## Python -```{python} +```python import urllib.request import zipfile import os @@ -164,12 +179,12 @@ if (!dir.exists("data")) { ## Julia -```julia +```{julia} using Downloads u = "https://github.com/Robinlovelace/opengeohub2023/releases/download/data/data.zip" f = basename(u) if !isfile(f) - download(u, f) + Downloads.download(u, f) end ``` @@ -185,7 +200,7 @@ We can unzip the data in R and Python: it contains spatial information about bus ## Python -```{python} +```python with zipfile.ZipFile(f, 'r') as zip_ref: zip_ref.extractall() ``` @@ -207,7 +222,7 @@ Note: we recommend using open file formats such as GeoPackage (`.gpkg`), as outl ## Python -```{python} +```python pol_all = gpd.read_file("zip://data.zip!data/osm/gis_osm_transport_a_free_1.shp") pol_all ``` @@ -221,9 +236,9 @@ pol_all ## Julia -```julia -df = GeoDataFrames.read("/vsizip/data.zip/data/osm/gis_osm_transport_a_free_1.shp") -df +```{julia} +pol_all = GeoDataFrames.read("/vsizip/data.zip/data/osm/gis_osm_transport_a_free_1.shp") +pol_all ``` ::: @@ -231,7 +246,7 @@ df -Note: in R, you can read-in the dataset from the URL in a single line of code without first downloading the zip file: +Note: in R and Julia, you can read-in the dataset from the URL in a single line of code without first downloading the zip file: ```{r} #| eval: false @@ -239,6 +254,11 @@ uz = paste0("/vsizip//vsicurl/", u, "/data/osm/gis_osm_transport_a_free_1.shp") pol_all = sf::read_sf(uz) ``` +```{julia} +zipurl = "/vsizip/vsicurl/" * u * "/data/osm/gis_osm_transport_a_free_1.shp" +pol_all = GeoDataFrames.read(zipurl) +``` + ## Subsetting by attributes The following commands select a subset of the data based on attribute values (looking for a specific string in the `name` column). @@ -247,7 +267,7 @@ The following commands select a subset of the data based on attribute values (lo ## Python -```{python} +```python pol = pol_all[pol_all['name'].str.contains('Port*.+Poz', na=False)] pol ``` @@ -256,10 +276,17 @@ pol ```{r} pol = pol_all |> - filter(str_detect(name, "Port*.+Poz")) + filter(str_detect(name, "Port*.+Poz")) pol ``` +## Julia +```{julia} +using DataFrames +pol = dropmissing(pol_all, :name) +pol = subset!(pol, :name => n -> occursin.(r"Port*.+Poz", n)) +``` + ::: ## Basic plotting @@ -271,7 +298,7 @@ Note that by default, R's `plot()` method for `{sf}` objects creates a plot for ## Python -```{python} +```python pol.plot(); ``` @@ -289,7 +316,7 @@ The arguments needed to change the colour of the fill and border are different i ## Python -```{python} +```python pol.plot(color='none', edgecolor='black'); ``` @@ -310,7 +337,7 @@ Note that two steps---creating the geometry column and combining it with the ori ## Python -```{python} +```python # Unzip the data.zip file: with zipfile.ZipFile(f, 'r') as zip_ref: zip_ref.extractall("data") @@ -326,8 +353,8 @@ stops ```{r} stops = read_csv("data/gtfs/stops.txt") |> - select(-stop_code) |> - st_as_sf(coords = c("stop_lon", "stop_lat"), crs = "EPSG:4326") + select(-stop_code) |> + st_as_sf(coords = c("stop_lon", "stop_lat"), crs = "EPSG:4326") stops ``` @@ -342,7 +369,7 @@ Note that the **tmap** package is hereby used in R to create these more advanced ## Python -```{python} +```python stops.plot(markersize=1, column='zone_id', legend=True); ``` @@ -350,7 +377,7 @@ stops.plot(markersize=1, column='zone_id', legend=True); ```{r} tm_shape(stops) + - tm_symbols(size = 0.1, col = "zone_id") + tm_symbols(size = 0.1, col = "zone_id") ``` ::: @@ -361,7 +388,7 @@ We can add basic overlays in both languages as follows. ## Python -```{python} +```python base = stops.plot(markersize=0.1) poi.plot(ax=base, color='red'); ``` @@ -389,7 +416,7 @@ Note that with **tmap**, you can use the same code to create static and interact ## Python -```{python} +```python stops.explore(column='zone_id', legend=True, cmap='Dark2') ``` @@ -398,7 +425,7 @@ stops.explore(column='zone_id', legend=True, cmap='Dark2') ```{r} tmap_mode("view") tm_shape(stops) + - tm_symbols(size = 0.1, col = "zone_id") + tm_symbols(size = 0.1, col = "zone_id") ``` ::: @@ -411,7 +438,7 @@ The following commands reproject the data to a local projected Coordinate Refere ## Python -```{python} +```python poi.crs poi_projected = poi.to_crs(2180) stops_projected = stops.to_crs(2180) @@ -443,7 +470,7 @@ For buffer operations to work in Python you must reproject the data first (which To create a new vector layers named `poi_buffer`, in both languages, we can do the following. -```{python} +```python poi_buffer = poi.copy() poi_buffer.geometry = poi_projected.buffer(150).to_crs(4326) ``` @@ -464,7 +491,7 @@ An interesting difference between R and Python is that the former uses the `unit ## Python -```{python} +```python poi_buffer.to_crs(2180).area ``` @@ -488,7 +515,7 @@ The R code is more concise because there is a special `[` notation for the speci ## Python -```{python} +```python poi_union = poi_buffer.unary_union sel = stops.intersects(poi_union) stops_in_b = stops[sel] @@ -513,7 +540,7 @@ See the [Python](https://geobgu.xyz/presentations/p_2023_ogh/01-vector.html#sec- ## Python -```{python} +```python poi_buffer.sjoin(stops, how='left') ``` @@ -525,7 +552,7 @@ st_join(poi_buffer, stops) ::: -```{python} +```python #| eval: false #| echo: false # Aim: check to see if there's a bug in sjoin diff --git a/osm.qmd b/osm.qmd index 0fdb087..3279de0 100644 --- a/osm.qmd +++ b/osm.qmd @@ -344,7 +344,7 @@ if (!"pyrosm" %in% py_pkg_names) { Search for Poznan in extracts available from `pyrosm` as follows (note: this fails for me currently as documented in [github.com/HTenkanen/pyrosm/issues/217](https://github.com/HTenkanen/pyrosm/issues/217)): -```{python} +```python #| eval: false import pyrosm from pyrosm import OSM @@ -376,7 +376,7 @@ if (!"osmnx" %in% py_pkg_names) { ``` -```{python} +```python import osmnx as ox import pandas as pd import geopandas as gpd @@ -387,7 +387,7 @@ poznan_polygon.plot(); That is quite a big network, so let's get the area of the polygon and use that to get a smaller network from [GitHub](https://github.com/Robinlovelace/opengeohub2023/raw/main/pois_buffer_simple.geojson): -```{python} +```python # Get data from https://github.com/Robinlovelace/opengeohub2023/raw/main/pois_buffer_simple.geojson: poznan_small = gpd.read_file("https://github.com/Robinlovelace/opengeohub2023/raw/main/pois_buffer_simple.geojson") poznan_small.plot(); @@ -395,19 +395,19 @@ poznan_small.plot(); Download the cycling network as follows: -```{python} +```python G_cycle = ox.graph_from_polygon(poznan_small.geometry[0], network_type="bike") ``` Plot the results: -```{python} +```python ox.plot_graph(G_cycle) ``` Get basic stats as follows: -```{python} +```python area = ox.project_gdf(poznan_small).unary_union.area stats = ox.basic_stats(G_cycle, area=area) pd.Series(stats) @@ -415,7 +415,7 @@ pd.Series(stats) We can convert the object into a 'GeoDataFrame' as follows: -```{python} +```python cycle_gdf = ox.graph_to_gdfs(G_cycle, edges=True) cycle_gdf[1].plot(); ```