In [2]:
import pandas as pd
import numpy as np
import datetime as dt

### Goals:
1. What are differences and similarities for the time between large earthquakes in ETAS/USGS
2. If there is a longer amount of time between large earthquakes, will it result in a more devastating one?
3. Can we use timeseries to predict the time until the next large earthquake in a manner that will follow ETAS/USGS?

### Questions
* What location grids should we group by

In [3]:
csv_file = "Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
# etas.head()
csv_file = "All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
# usgs.head()

  usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})


In [4]:
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
#converting the Date column into datetime format
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)

In [5]:
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [6]:
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

usgs.head()

Unnamed: 0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r,Date
240,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,ci,...,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r,2022-12-31
241,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,ci,...,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r,2022-12-31
246,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,nc,...,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r,2022-12-22
262,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,nc,...,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r,2022-12-17
263,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,nc,...,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r,2022-12-13


In [7]:
#initial plot of latitude and longitude
import plotly.express as px
import plotly.graph_objects as go

In [8]:
fig = px.scatter(etas, x='X', y='Y', color='Magnitude', color_continuous_scale='viridis', title='Earthquake Locations ETAS (Magnitude)')
fig.update_layout(width=600, height=600)
fig.show()

In [9]:
fig = px.scatter(usgs, x='longitude', y='latitude', color='mag', color_continuous_scale='viridis', title='Earthquake Locations USGS')
fig.update_layout(width=600, height=600)
fig.show()

Intital plots show that USGS earthquakes are much more clustered compared to the ETAS

In [10]:
# ETAS energy in terms of location
const = (1/1.5)
etas['Energy'] = 10**(1.5*etas['Magnitude'])
etas['Energy'] = np.log(etas['Magnitude'])*const

fig = px.scatter(etas, x='X', y='Y', color='Energy', color_continuous_scale='viridis', title='Earthquake Locations ETAS (Energy)')
fig.update_layout(width=600, height=600)
fig.show()

In [19]:
# USGS energy in terms of location

const = (1/1.5)
usgs['Energy'] = 10**(1.5*usgs['mag'])
usgs['Energy'] = np.log(usgs['mag'])*const

fig = px.scatter(usgs, x='longitude', y='latitude', color='Energy', color_continuous_scale='viridis', title='Earthquake Locations USGS (Energy)')
fig.update_layout(width=600, height=600)
fig.show()

In [12]:
#let us split etas and usgs by grids of 1 x 1 latitude and longitude 
lat_grid_size = 1
lon_grid_size = 1
large_earthquake = 6
num_large_earthquakes = 2

In [13]:
def assign_grid_labels_usgs(row):
    lon_label = int(row['longitude'] // lon_grid_size)
    lat_label = int(row['latitude'] // lat_grid_size)
    return f'Grid_{lon_label}_{lat_label}'

def assign_grid_labels_etas(row):
    lon_label = int(row['X'] // lon_grid_size)
    lat_label = int(row['Y'] // lat_grid_size)
    return f'Grid_{lon_label}_{lat_label}'


In [14]:
etas['grid_label'] = etas.apply(assign_grid_labels_etas, axis=1)
grid_etas = {}
for label, group in etas.groupby('grid_label'):
    if (group['Magnitude'] > large_earthquake).sum() >= num_large_earthquakes:
        grid_etas[label] = group
print(grid_etas)
print(len(grid_etas))

{'Grid_-116_32':             Date         Time         Year         X        Y  Magnitude  \
6     1960-01-02   0:11:24.00  1960.007924 -115.5757  32.9984       3.33   
11    1960-01-03   0:15:00.00  1960.010425 -115.6458  32.9994       3.50   
12    1960-01-04   0:16:00.00  1960.011119 -115.6709  32.9659       4.24   
14    1960-01-04   0:18:36.00  1960.012923 -115.7143  32.9740       3.54   
15    1960-01-04   0:18:47.00  1960.013053 -115.6745  32.9474       3.12   
...          ...          ...          ...       ...      ...        ...   
31744 2022-12-25  23:37:34.00  2022.984428 -115.8350  32.9910       3.89   
31746 2022-12-26  23:43:19.00  2022.988423 -115.0715  32.2152       3.47   
31751 2022-12-29  23:52:49.00  2022.995018 -115.5915  32.9208       3.00   
31752 2022-12-31   0:00:31.00  2023.000361 -115.6329  32.9102       3.13   
31753 2022-12-31   0:02:36.00  2023.001815 -115.6517  32.9682       3.82   

          Z\r    Energy    grid_label  
6      7.9606  0.801982  Grid_

In [15]:
usgs['grid_label'] = usgs.apply(assign_grid_labels_usgs, axis=1)
grid_usgs  = {}
for label, group in usgs.groupby('grid_label'):
    if (group['mag'] > large_earthquake).sum() >= num_large_earthquakes:
        grid_usgs[label] = group
print(grid_usgs)
print(len(grid_usgs))

{'Grid_-116_32':         latitude   longitude  depth   mag magType  nst    gap      dmin   rms  \
304    32.338167 -115.244333   2.83  3.57      ml   26    218   0.09598  0.35   
339    32.406333 -115.225167  12.18  4.03      mw   33    101   0.06879  0.36   
356    32.399000 -115.240833  10.97  3.09      ml   35    101     0.058  0.35   
389    32.644167 -115.724667  11.15  3.42      ml   53    105  0.005348  0.26   
392    32.398833 -115.229167  19.74  3.75      mw   32    102   0.06737  0.32   
...          ...         ...    ...   ...     ...  ...    ...       ...   ...   
32871  32.917000 -115.470000    6.0  3.22      ml  4.0  268.0    0.1357  0.39   
32872  32.721000 -115.796833    6.0  3.65      ml  3.0  261.0    0.7391  0.22   
32879  32.247667 -115.798167    6.0  3.67      ml  3.0  296.0    0.8554  0.12   
32884  32.451833 -115.766167    6.0  3.71      ml  3.0  283.0    0.7981  0.49   
32897  32.221167 -115.904833    6.0  3.64      ml  3.0  340.0    0.7939  0.83   

      net 

In [16]:
fig = go.Figure()

# Create a color map for unique colors for each grid
colors = px.colors.qualitative.Pastel

# Iterate through the grid_dataframes and add scatter plots to the figure
for i, (label, grid_df) in enumerate(grid_etas.items()):
    color = colors[i % len(colors)]  # Cycle through the color map
    scatter = px.scatter(grid_df, x='X', y='Y', color_discrete_sequence=[color])
    fig.add_trace(scatter.data[0])  # Add the trace to the main figure

# Display the combined figure
fig.update_layout(title='Large Magnitude Grids (ETAS)')
fig.update_layout(width=600, height=600)
fig.show()

In [17]:
fig = go.Figure()

# Create a color map for unique colors for each grid
colors = px.colors.qualitative.Pastel

# Iterate through the grid_dataframes and add scatter plots to the figure
for i, (label, grid_df) in enumerate(grid_usgs.items()):
    color = colors[i % len(colors)]  # Cycle through the color map
    scatter = px.scatter(grid_df, x='longitude', y='latitude', color_discrete_sequence=[color])
    fig.add_trace(scatter.data[0])  # Add the trace to the main figure

# Display the combined figure
fig.update_layout(title='Large Magnitude Grids (USGS)')
fig.update_layout(width=600, height=600)
fig.show()

Observations - ETAS has more grids with higher magnitude earthquakes

In [18]:
for i, (label, grid_df) in enumerate(grid_etas.items()):
    pass
#param continue here... implement a small model predicting time till next 
# large earthquake for each of the cells