In [660]:
import pandas as pd
import numpy as np
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go

Goals:
1. What are differences and similarities for the time between large earthquakes in ETAS/USGS
2. If there is a longer amount of time between large earthquakes, will it result in a more devastating one?
3. Can we use timeseries to predict the time until the next large earthquake in a manner that will follow ETAS/USGS?

In [661]:
csv_file = "Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
# etas.head()
csv_file = "All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
# usgs.head()


Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.



In [662]:
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
#converting the Date column into datetime format
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)

In [663]:
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [664]:
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

usgs.head()

Unnamed: 0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r,Date
240,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,ci,...,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r,2022-12-31
241,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,ci,...,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r,2022-12-31
246,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,nc,...,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r,2022-12-22
262,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,nc,...,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r,2022-12-17
263,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,nc,...,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r,2022-12-13


In [665]:
max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('M')).Magnitude.max())
max_mag_etas.reset_index(inplace=True)
max_mag_etas.head()

Unnamed: 0,Date,Magnitude
0,1960-01,4.25
1,1960-02,4.68
2,1960-03,5.22
3,1960-04,4.49
4,1960-05,4.29


In [666]:
time = []
for i in usgs['Date']:
    time.append(pd.to_datetime(i))
usgs['Date'] = time

In [667]:
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['Date'].dt.to_period('M')).mag.max())
max_mag_usgs.reset_index(inplace=True)
max_mag_usgs.head()

Unnamed: 0,Date,mag
0,1960-01,5.0
1,1960-02,3.79
2,1960-03,3.67
3,1960-04,4.24
4,1960-05,4.0


In [668]:
large_earthquake = 6

In [669]:
large_mag_etas = max_mag_etas[max_mag_etas['Magnitude'] > large_earthquake].copy()
large_mag_etas['time_diff'] = large_mag_etas['Date'].dt.to_timestamp().diff()
large_mag_etas['time_diff'] = large_mag_etas['time_diff'].dt.days
large_mag_etas.head()

Unnamed: 0,Date,Magnitude,time_diff
25,1962-02,6.38,
28,1962-05,6.01,89.0
51,1964-04,7.65,701.0
60,1965-01,7.26,275.0
81,1966-10,6.66,638.0


In [670]:
large_mag_etas['Date'] = large_mag_etas['Date'].dt.to_timestamp()
fig = px.bar(large_mag_etas, x='Date', y='time_diff', labels={'time_diff_days': 'Time Difference (Days)'})

# Customize the plot layout
fig.update_layout(
    title='Time Difference Bar Chart',
    xaxis_title='Date',
    yaxis_title='Time Difference (Days)',
)

In [671]:
large_mag_usgs = max_mag_usgs[max_mag_usgs['mag'] > large_earthquake].copy()
large_mag_usgs['time_diff'] = large_mag_usgs['Date'].dt.to_timestamp().diff()
large_mag_usgs['time_diff'] = large_mag_usgs['time_diff'].dt.days
large_mag_usgs.head()

Unnamed: 0,Date,mag,time_diff
99,1968-04,6.6,
133,1971-02,6.6,1036.0
161,1973-06,6.1,851.0
185,1975-06,6.2,730.0
189,1975-10,6.4,122.0


In [672]:
large_mag_usgs['Date'] = large_mag_usgs['Date'].dt.to_timestamp()
fig = px.bar(large_mag_usgs, x='Date', y='time_diff', labels={'time_diff_days': 'Time Difference (Days)'})

# Customize the plot layout
fig.update_layout(
    title='Time Difference Bar Chart',
    xaxis_title='Date',
    yaxis_title='Time Difference (Days)',
)
