# Python Workshop - 2025

<div>
    <img src="../images/qcbs_logo_v2.svg" style="background-color: #f0f0f0; padding: 20px;"/>
</div>

<div>
    <img src="../images/python_logo_generic.svg" style="background-color: #f0f0f0; padding: 20px;"/>
</div>

**Last update**: 2025-05-19  
**Author**: El-Amine Mimouni  
**Affiliation**: Québec Centre for Biodiversity Science

**Overview**: In this notebook, we will see how to use PyProj.

---

# PyProj

Information about PyProj can be found at [https://pyproj4.github.io/pyproj/stable/index.html](https://pyproj4.github.io/pyproj/stable/index.html).

Information about the actual Proj can be found at [https://proj.org/en/stable/index.html](https://proj.org/en/stable/index.html).

In [None]:
# The star of today
import pyproj

# Other actors
import pandas as pd
import matplotlib.pyplot as plt
from pprint import pprint

# 2. Projections and CRS

A projection is a way of representing FEATURES, which are on a 3D surface of the Earth, on a flat 2D map. A Coordinate Reference System (CRS) defines how spatial data relates to real-world locations, combining a projection with a coordinate system and a datum.

In [None]:
# Create instances of CRS for three CRS
# Use their EPSG codes
# Note: Check out the other .from_x() methods
wgs84 = pyproj.CRS.from_string("EPSG:4326")
nad83_ql = pyproj.CRS.from_string("EPSG:32198")
nad83_cali = pyproj.CRS.from_string("EPSG:2227")

# Print info about each CRS!
print("WGS84 info:")
print(wgs84)
print(type(wgs84))
#
print("\nNAD83 / QL info:")
print(nad83_ql)
print(type(nad83_ql))
#
print("\nNAD83 / Cali info:")
print(nad83_cali)
print(type(nad83_cali))

In [None]:
# More info can be obtained from the WKT representation of each CRS
print("WKT representation of WGS84:")
pprint(wgs84.to_wkt())

print("\nWKT representation of NAD83 / QL:")
pprint(nad83_ql.to_wkt())

print("\nWKT representation of NAD83 / Cali:")
pprint(nad83_cali.to_wkt())

In [None]:
# A lot of information
print("Area of use for WGS84:")
print(wgs84.area_of_use)
#
print("\nArea of use for NAD83 / QL:")
print(nad83_ql.area_of_use)
#
print("\nArea of use for NAD83 / Cali:")
print(nad83_cali.area_of_use)

In [None]:
# Pertinent information
print("Is WGS84 projected?")
print(wgs84.is_projected)
#
print("\nIs NAD83 / QL projected?")
print(nad83_ql.is_projected)
#
print("\nIs NAD83 / Cali projected?")
print(nad83_cali.is_projected)

In [None]:
# Pertinent information
print("\nAxis information for WGS84:")
print(wgs84.axis_info)
#
print("\nAxis information for NAD83 / QL:")
print(nad83_ql.axis_info)
#
print("\nAxis information for NAD83 / Cali:")
print(nad83_cali.axis_info)

In [None]:
# Note the two values of ellipsoid for WGS84
print("\nEllipsoid information for WGS84:")
print("- Semi-major axis:", wgs84.ellipsoid.semi_major_metre)
print("- Inverse flattening:", wgs84.ellipsoid.inverse_flattening)

In [None]:
# We don't want another Mars Climate Orbiter Incident...

# Passing from one CRS to another

To switch from one CRS to another requires TRANSFORMERS

In [None]:
# Create an instance of a Transformer
my_transformer = pyproj.Transformer.from_crs(crs_from=wgs84, crs_to=nad83_ql, always_xy=True)

# Print out info about it!
print(my_transformer)
print(type(my_transformer))

In [None]:
# Create tuples containing the coordinates of Montreal and Quebec
# Notice they are in the Lon-Lat format
montreal_wgs84 = (-73.5673, 45.5017)
quebec_wgs84 = (-71.2082, 46.8139)

print("Coordinates of Montreal in WGS84: ", montreal_wgs84)
print("Coordinates of Quebec in WGS84: ", quebec_wgs84)

In [None]:
# Use the .transform() method of my_transformer to get the coordinates
# Note: It needs the order Lon-Lat, which is what we had
montreal_nad83_ql = my_transformer.transform(xx=montreal_wgs84[0], yy=montreal_wgs84[1])
quebec_nad83_ql = my_transformer.transform(xx=quebec_wgs84[0], yy=quebec_wgs84[1])

#
print(f"Coordinates of Montreal in NAD83 / QL: ({montreal_nad83_ql[0]:.2f}, {montreal_nad83_ql[1]:.2f})")
print(f"Coordinates of Quebec in NAD83 / QL: ({quebec_nad83_ql[0]:.2f}, {quebec_nad83_ql[1]:.2f})")

To verify them (in case you doubt your own self):

https://epsg.io/map#srs=32198&x=-396122.43209208664&y=181374.14914630336&z=9&layer=streets
https://epsg.io/map#srs=32198&x=-206315.6389128428&y=317060.9367326632&z=8&layer=streets

# Calculating geodetic distances

Pyproj allows for the calculation of geodetic distances

In [None]:
# Initialize a Geod for WGS84:
geod_wgs84 = pyproj.Geod(ellps="WGS84")

# Identical to:
geod_wgs84 = pyproj.Geod(a=wgs84.ellipsoid.semi_major_metre, f = 1.0 / wgs84.ellipsoid.inverse_flattening)

# Print info about it!
print(geod_wgs84)
print(type(geod_wgs84))

In [None]:
# Coordinates of two points: Montreal and Paris
# We already defined Montreal, so let's just add Paris in Lon-Lat form
paris_wgs84 = (2.3522, 48.8566)

In [None]:
# Compute the inverse (distance, azimuths) using the WGS84 ellipsoidal model
# Note: Slightly more informative ARGUMENTS (and plural at that!)
azimuth1_wgs84, azimuth2_wgs84, distance_wgs84 = geod_wgs84.inv(lons1=montreal_wgs84[0],
                                                                lats1=montreal_wgs84[1],
                                                                lons2=paris_wgs84[0],
                                                                lats2=paris_wgs84[1])

# BTW, in geodesy:
# 
# - Forward problem: You are given the coordinates of a starting point, a distance and an azimuth.
#                    Based on these, and you want to compute the destination point coordinates.
#
# - Inverse problem: You are given the coordinates of two points.
#                    Based on these, you want to compute the distance and the azimuth between them.

In [None]:
# WGS84 distance (in km)
print(f"Distance (WGS84): {distance_wgs84 / 1000:.2f} km")

In [None]:
# Spherical model just for fun
# Same planet, but a perfect sphere
#geod_sphere = pyproj.Geod(a=6371000.00, f = 0.0)
#geod_sphere = pyproj.Geod(ellps="sphere")

# Consider
geod_sphere = pyproj.Geod(a=(wgs84.ellipsoid.semi_major_metre + wgs84.ellipsoid.semi_minor_metre) / 2.0, f=0.0)

# Compute the inverse (distance, azimuths) using the WGS84 ellipsoidal model
azimuth1_sphere, azimuth2_sphere, distance_sphere = geod_sphere.inv(lons1=montreal_wgs84[0],
                                                                    lats1=montreal_wgs84[1],
                                                                    lons2=paris_wgs84[0],
                                                                    lats2=paris_wgs84[1])

# Spherical distance in km:
print(f"Distance (Spherical): {distance_sphere / 1000:.2f} km")

In [None]:
# How different are the two distances?
print(f"Difference between the two distances: {(distance_wgs84 - distance_sphere) / 1000:.2f} km")

# Biological application


In [None]:
# Calculating
# Load the journey of KOR0104
df_104 = pd.read_csv(filepath_or_buffer="../data/KOR0104-43589.csv")
df_104.head()

# Original data landing page:
# https://www.movebank.org/cms/webapp?gwt_fragment=page=studies,path=study3030282201

In [None]:
# Compute the inverse (distance, azimuths) using the WGS84 ellipsoidal model
# By removing the last and first values (.iloc() is of choice here), we can
# match each successive stop by the turtle
azimuth1s_wgs84, azimuths2_wgs84, distances_kor104 = geod_wgs84.inv(lons1=df_104.iloc[:-1]["location-long"],
                                                                    lats1=df_104.iloc[:-1]["location-lat"],
                                                                    lons2=df_104.iloc[1:]["location-long"],
                                                                    lats2=df_104.iloc[1:]["location-lat"])

In [None]:
# Print the distances!
print("Array of distances:")
print(distances_kor104)

In [None]:
# BTW try and guess what type distances_kor104 is?
print(type(distances_kor104))

In [None]:
# Print mini-message
print(f"Total distance traveled by KOR-104: {distances_kor104.sum() / 1000:.2f} km")

In [None]:
# Create a figure and axis
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 10))
ax.set_aspect(aspect="auto", adjustable="box")

# Scatter plot for original data points (assuming distances_wgs84 is defined)
ax.plot(distances_kor104)

# Add axis labels and title
ax.set_title(label="Lineplot of distances traveled by the turtle KOR-0104!")
ax.set_xlabel(xlabel="Series")
ax.set_ylabel(ylabel="Distance traveled (in km)")

# Adjust layout and show the plot
fig.tight_layout()
plt.show()

# Bonus round: time series in Pandas

In [None]:
# Have a look at the "timestamp" column
print(df_104["timestamp"].head())
print(df_104["timestamp"].dtype)

In [None]:
# Use Pandas particular datetime format
print(pd.to_datetime(df_104["timestamp"]).head())
print(pd.to_datetime(df_104["timestamp"]).dtype)

In [None]:
# Use it to replace the "timestamp" variable
# No need for an additional column
df_104["timestamp"] = pd.to_datetime(df_104["timestamp"])

In [None]:
# Create a figure and axis
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(18, 10))
ax.set_aspect(aspect="auto", adjustable="box")

# Scatter plot for original data points
ax.plot(df_104["timestamp"][1:], distances_kor104)

# Add axis labels and title
ax.set_title(label="Distance traveled by the turtle KOR-104!\nWith time on the x-axis!")
ax.set_xlabel(xlabel="Time")
ax.set_ylabel(ylabel="Distance traveled (in km)")

# Adjust layout and show the plot
fig.tight_layout()
plt.show()