# Earthquakes in 2025 — EDA

We take a quick look at the 2025 earthquake catalog and use a few simple views to see what patterns and questions emerge.

In [1]:
from shapely.geometry import box
import pandas as pd
import geopandas as gpd

from lets_plot import *
from lets_plot.tilesets import *
from lets_plot.geo_data import *

The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).


In [2]:
def custom_theme():
    return theme(geom=element_geom(pen="teal", brush="turquoise", paper="light_yellow"),
                 plot_title=element_text(size=18),
                 plot_caption=element_text(size=10))

In [3]:
LetsPlot.setup_html()
LetsPlot.set_theme(theme_gray() + custom_theme())  # theme_gray() is the same as theme_grey()!

In [4]:
def title_layer(title, subtitle=None):
    return labs(
        title=title,
        subtitle=subtitle,
        caption='<a href="https://earthquake.usgs.gov/fdsnws/event/1/" target="_blank">USGS Earthquake Catalog</a>'
    )

In [5]:
# named colors for plots
mag_type_colors = {
    "mb": "tomato",
    "mww": "lime_green",
    "mw": "rebeccapurple",
    "mwr": "orange",
    "ml": "turquoise",
    "md": "plum",
    "mwb": "olive",
    "ms_vx": "navy",
}

In [6]:
df = pd.read_csv("../data/usgs_earthquakes_2025_m45.csv", parse_dates=["date"])
print(df.shape)
df.head(2)

(8252, 22)


Unnamed: 0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,date
0,-3.8682,151.6536,10.0,4.7,mb,39.0,99.0,0.557,0.86,us,...,"67 km WNW of Rabaul, Papua New Guinea",earthquake,6.86,1.901,0.108,26.0,reviewed,us,us,2025-01-01
1,-3.8521,151.6207,10.0,4.5,mb,36.0,98.0,0.591,0.98,us,...,"71 km WNW of Rabaul, Papua New Guinea",earthquake,7.8,1.901,0.116,22.0,reviewed,us,us,2025-01-01


In [7]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df["longitude"], df["latitude"]), crs="EPSG:4326")
print(gdf.shape)
gdf.head(2)

(8252, 23)


Unnamed: 0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,date,geometry
0,-3.8682,151.6536,10.0,4.7,mb,39.0,99.0,0.557,0.86,us,...,earthquake,6.86,1.901,0.108,26.0,reviewed,us,us,2025-01-01,POINT (151.65360 -3.86820)
1,-3.8521,151.6207,10.0,4.5,mb,36.0,98.0,0.591,0.98,us,...,earthquake,7.8,1.901,0.116,22.0,reviewed,us,us,2025-01-01,POINT (151.62070 -3.85210)


In [8]:
plates_gdf = gpd.read_file("../data/plates.json")
print(plates_gdf.shape)
plates_gdf.head(2)

(54, 4)


Unnamed: 0,LAYER,Code,PlateName,geometry
0,plate,AF,Africa,"POLYGON ((-0.43790 -54.85180, -0.91465 -54.453..."
1,plate,AN,Antarctica,"POLYGON ((180.00000 -65.74940, 180.00000 -90.0..."


## Distribution of Magnitude and Depth

Start with the basic distributions to see what values are typical and how heavy the tails are.

In [9]:
magnitude_histogram_plot = (ggplot(df, aes(x="mag"))
  + geom_histogram(breaks=[4.5, 4.6, 4.8, 5, 5.5, 9], fill='brush')  # custom breaks
  + xlab("magnitude")
  + title_layer("Magnitude distribution"))

depth_histogram_plot = (ggplot(df, aes(x="depth"))
  + geom_histogram(breaks=[0, 10, 40, 100, 400, 700], fill='brush')  # custom breaks
  + title_layer("Depth distribution"))

gggrid([magnitude_histogram_plot, depth_histogram_plot])

## Magnitude and Density of Epicenters on Map

Map epicenters and color by magnitude/density to see where earthquakes cluster geographically and whether larger events concentrate in particular regions.

In [10]:
p = (ggplot()
  + geom_livemap(const_size_zoomin=1, tiles=LETS_PLOT_LIGHT)
  + geom_map(aes(fill="PlateName"), map=plates_gdf, size=0, alpha=.15, show_legend=False,
             tooltips=layer_tooltips().line("Plate name|@PlateName"))
  + theme(legend_position='bottom'))

gggrid([
    (p
      + geom_point(aes("longitude", "latitude", color="mag"), data=df.sort_values(by="mag"),
                   tooltips=layer_tooltips().line("magnitude|@mag"))
      + scale_color_viridis(option='magma')
      + title_layer("Epicenters: magnitude")),
    (p
      + geom_pointdensity(aes("longitude", "latitude"), data=df)  # new geometry
      + scale_color_viridis()
      + title_layer("Epicenters: density"))
], ncol=1) + ggsize(800, 1000)

### A Closer Look at Japan, a Country With a High Density of Earthquake Epicentres

Zoom into a high-activity region to examine local clustering and variation in event characteristics. The map shows where events occur and how depth and magnitude vary spatially, while the inset scatter provides a quick check of the depth–magnitude relationship within the same region.

In [11]:
country = geocode_countries(names="Japan")
states = geocode_states().scope(country)
states_centroids_gdf = states.get_centroids()
states_boundaries_gdf = states.inc_res(2).get_boundaries()
japan_gdf = gdf[gdf["geometry"].within(box(*country.get_boundaries().iloc[0]["geometry"].bounds))]

japan_map_plot = (ggplot()
  + geom_map(data=states_boundaries_gdf, color='pen', fill='paper', tooltips=layer_tooltips().line("@{found name}"))
  + geom_point(aes(fill="depth", size="mag"), map=japan_gdf.sort_values(by="mag"), shape=21, color="black",
               tooltips=layer_tooltips().title("(@longitude, @latitude)")
                                        .format("@longitude", ".2~f").format("@latitude", ".2~f")
                                        .line("magnitude|@mag").line("@|@depth"))
  + geom_label_repel(aes(label="found name"), data=states_centroids_gdf, color="maroon", max_iter=5_000, max_time=-1, max_overlaps=7, seed=10)  # new geometry
  + scale_fill_viridis(option='cividis', direction=-1)
  + scale_size(name="magnitude")
  + title_layer("Earthquakes in Japan")
  + theme_void() + custom_theme() + theme(plot_background=element_rect(fill="azure")))
japan_scatter_plot = (ggplot()
  + geom_point(aes("mag", "depth"), data=japan_gdf.drop(columns=["geometry"]), color="royal_blue")
  + xlab("magnitude"))

ggbunch([japan_map_plot, japan_scatter_plot], [(0, 0, 1, 1), (.1, .05, .32, .24)]) + ggsize(800, 800)  # a replacement for GGBunch

## Scatter Plots

Check whether stronger earthquakes tend to occur at particular depths and whether this relationship differs by magnitude type.

In [12]:
depth_vs_magnitude_plot = (ggplot(df, aes("mag", "depth", color="magType"))
  + geom_point(alpha=.5)
  + scale_color_manual(mag_type_colors, name="magnitude type")
  + xlab("magnitude")
  + title_layer("Depth vs. Magnitude"))

nst_vs_mag_error_plot = (ggplot(df, aes("magNst", "magError", color="magType"))
  + geom_point(alpha=.5)
  + scale_color_manual(mag_type_colors, name="magnitude type")
  + xlab("number of stations") + ylab("magnitude error")
  + title_layer("Number of stations vs. Magnitude error"))

(gggrid([
     depth_vs_magnitude_plot,
     nst_vs_mag_error_plot
 ], guides='collect')                  # shared legends
   + ggtb(size_zoomin=2)               # control how zooming in affects the size of geometry objects
   + theme(legend_position='bottom'))  # legend wraps automatically

Assess data quality by looking at how uncertainty changes with observational coverage. More stations typically means more stable estimates.

In [13]:
top_size = 5

joined_gdf = gpd.sjoin(gdf, plates_gdf, how='left', predicate='within', lsuffix="earthquake", rsuffix="plate")
top_plate_names = joined_gdf["PlateName"].value_counts().iloc[:top_size].keys().to_list()
top_joined_gdf = joined_gdf[joined_gdf["PlateName"].isin(top_plate_names)].reset_index(drop=True)

(ggplot(top_joined_gdf)
  + geom_point(aes("mag", "depth", color="magType"), alpha=.5, tooltips='none')
  + scale_color_manual(mag_type_colors, name="magnitude type")
  + xlab("magnitude")
  + facet_wrap(["magType", "PlateName"], drop=False, ncol=top_size)  # do not drop empty facets
  + theme(panel_spacing=4, strip_spacing=2)                          # configure spacing between panels in facets
  + title_layer("Depth vs. Magnitude by type and plate"))

## Explore Distribution of Magnitudes

Compare the observed magnitude distribution to a reference distribution to understand tail behavior and departures from a simple model.

In [14]:
(ggplot(df.sort_values(by="magType"),
        aes(sample="mag", color="magType", group=[]))                  # group=[] to prevent the data from being split for statistical transformation
  + geom_qq(distribution='exp', alpha=.5,              
            tooltips=layer_tooltips().line("magnitude type|@magType")  # Q-Q plot preserves the mapping to the original data
                                     .line("magnitude|@mag")           # after statistical transformation,
                                     .line("@|@depth"))                # so we can use "depth" in tooltips
  + scale_color_manual(mag_type_colors, name="magnitude type")
  + title_layer("Q-Q plot of magnitudes", "distribution='exp'"))

## Happy New Year!

We hope the coming year won't shake things up too much — in the best way.

In [15]:
(ggplot(pd.read_csv("../data/mcclane.csv", sep=';'))
  + geom_hex(aes("x", "y", fill="color"), stat='identity', width=.5, size=0, tooltips=layer_tooltips().line("@comment"))
  + scale_y_reverse()
  + scale_fill_identity()
  + theme_void() + theme(tooltip_text=element_text(size=30))
  + ggsize(1000, 700))