In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go

In [20]:
csv_file = "Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
0,12/31/59,0:03:09.00,1960.002196,-119.0502,33.979,6.5,8.2474
1,1/2/60,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1/2/60,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1/2/60,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1/2/60,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737


In [21]:
csv_file = "All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
usgs.head()


Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r
0,2023-10-12T21:41:21.480Z,36.4661674,-120.8755035,15.85,3.15,ml,60,135,0.1035,0.2,...,2023-10-13T19:19:02.194Z,"19 km WNW of New Idria, CA",earthquake,0.4,0.61,0.175,21,automatic,nc,nc\r
1,2023-10-08T21:30:23.900Z,38.8271667,-122.804,1.75,3.87,mw,108,20,0.006058,0.06,...,2023-10-14T02:06:32.597Z,"7 km NW of The Geysers, CA",earthquake,0.07,0.11,,3,reviewed,nc,nc\r
2,2023-10-05T03:09:58.000Z,35.041,-117.661,0.79,3.52,ml,63,40,0.1102,0.15,...,2023-10-06T21:24:55.024Z,"5 km NNW of Boron, CA",earthquake,0.12,0.32,0.15,156,reviewed,ci,ci\r
3,2023-10-01T19:29:36.760Z,40.2915,-124.2905,9.59,3.61,mw,40,115,0.0308,0.17,...,2023-10-10T16:43:18.991Z,"4 km S of Petrolia, CA",earthquake,0.36,0.21,,4,reviewed,nc,nc\r
4,2023-10-01T15:41:29.620Z,40.2951667,-124.287,9.8,4.09,mw,42,105,0.02685,0.17,...,2023-10-02T02:34:55.127Z,"3 km S of Petrolia, CA",earthquake,0.37,0.23,,4,reviewed,nc,nc\r


In [22]:
#converting the Date column into datetime format
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
0,1959-12-31,0:03:09.00,1960.002196,-119.0502,33.979,6.5,8.2474
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737


In [23]:
#converting the Date column into datetime format
usgs["time"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r
0,2023-10-12,36.4661674,-120.8755035,15.85,3.15,ml,60,135,0.1035,0.2,...,2023-10-13T19:19:02.194Z,"19 km WNW of New Idria, CA",earthquake,0.4,0.61,0.175,21,automatic,nc,nc\r
1,2023-10-08,38.8271667,-122.804,1.75,3.87,mw,108,20,0.006058,0.06,...,2023-10-14T02:06:32.597Z,"7 km NW of The Geysers, CA",earthquake,0.07,0.11,,3,reviewed,nc,nc\r
2,2023-10-05,35.041,-117.661,0.79,3.52,ml,63,40,0.1102,0.15,...,2023-10-06T21:24:55.024Z,"5 km NNW of Boron, CA",earthquake,0.12,0.32,0.15,156,reviewed,ci,ci\r
3,2023-10-01,40.2915,-124.2905,9.59,3.61,mw,40,115,0.0308,0.17,...,2023-10-10T16:43:18.991Z,"4 km S of Petrolia, CA",earthquake,0.36,0.21,,4,reviewed,nc,nc\r
4,2023-10-01,40.2951667,-124.287,9.8,4.09,mw,42,105,0.02685,0.17,...,2023-10-02T02:34:55.127Z,"3 km S of Petrolia, CA",earthquake,0.37,0.23,,4,reviewed,nc,nc\r


In [24]:
#filter the dataset by Date > 1960-01-01 and Date < 2023-01-01 
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [25]:
#filter the dataset by Date > 1960-01-01 and Date < 2023-01-1 
usgs = usgs[(pd.to_datetime(usgs['time']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['time']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
usgs.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r
240,2022-12-31,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,...,2023-09-22T21:50:30.029Z,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r
241,2022-12-31,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,...,2023-03-07T19:00:01.040Z,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r
246,2022-12-22,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,...,2023-04-20T04:34:00.806Z,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r
262,2022-12-17,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,...,2023-07-27T08:15:34.318Z,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r
263,2022-12-13,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,...,2023-02-18T22:04:08.040Z,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r


In [26]:
time = []
for i in usgs['time']:
    time.append(pd.to_datetime(i))
usgs['time'] = time

In [27]:
formula_constant = (1/1.5)

usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')
usgs['energy'] = 10**(1.5*usgs['mag'])
usgs['energy'] = np.log(usgs['energy']) * formula_constant
usgs.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r,energy
240,2022-12-31,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,...,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r,9.532702
241,2022-12-31,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,...,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r,7.98997
246,2022-12-22,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,...,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r,7.690634
262,2022-12-17,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,...,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r,8.220229
263,2022-12-13,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,...,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r,7.552479


In [28]:
etas['Magnitude'] = pd.to_numeric(etas['Magnitude'], errors='coerce')
etas['energy'] = 10**(1.5*etas['Magnitude'])
etas['energy'] = np.log(etas['energy']) * formula_constant
etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r,energy
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322,9.785987
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015,6.976833
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678,6.976833
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737,7.091962
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322,8.312332


In [29]:
# Mean energy for each day
mean_energy_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('D')).energy.mean())
print(mean_energy_etas)

# Sum of energy for each day
sum_energy_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('D')).energy.sum())
print(sum_energy_etas)

# Max energy for each day
max_energy_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('D')).energy.max())
print(max_energy_etas)

              energy
Date                
1960-01-02  7.801926
1960-01-03  8.059048
1960-01-04  8.076317
1960-01-05  7.828789
1960-01-06  7.271564
...              ...
2022-12-26  7.989970
2022-12-27  7.759712
2022-12-28  7.909380
2022-12-29  9.313957
2022-12-31  8.001483

[18880 rows x 1 columns]
               energy
Date                 
1960-01-02  46.811555
1960-01-03  40.295239
1960-01-04  32.305269
1960-01-05   7.828789
1960-01-06  36.357819
...               ...
2022-12-26   7.989970
2022-12-27   7.759712
2022-12-28  15.818760
2022-12-29  18.627913
2022-12-31  16.002966

[18880 rows x 1 columns]
               energy
Date                 
1960-01-02   9.785987
1960-01-03   8.980082
1960-01-04   9.762961
1960-01-05   7.828789
1960-01-06   7.989970
...               ...
2022-12-26   7.989970
2022-12-27   7.759712
2022-12-28   8.358384
2022-12-29  11.720158
2022-12-31   8.795875

[18880 rows x 1 columns]


In [30]:
# Mean energy for each day
mean_energy_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).energy.mean())
print(mean_energy_usgs)

# Sum of energy for each day
sum_energy_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).energy.sum())
print(sum_energy_usgs)

# Max energy for each day
max_energy_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).energy.max())
print(max_energy_usgs)

              energy
time                
1960-01-02  9.302444
1960-01-05  6.976833
1960-01-07  8.381410
1960-01-08  7.138014
1960-01-11  8.726798
...              ...
2022-12-09  6.907755
2022-12-13  7.552479
2022-12-17  8.220229
2022-12-22  7.690634
2022-12-31  8.761336

[11447 rows x 1 columns]
               energy
time                 
1960-01-02   9.302444
1960-01-05   6.976833
1960-01-07   8.381410
1960-01-08   7.138014
1960-01-11   8.726798
...               ...
2022-12-09   6.907755
2022-12-13   7.552479
2022-12-17   8.220229
2022-12-22   7.690634
2022-12-31  17.522673

[11447 rows x 1 columns]
              energy
time                
1960-01-02  9.302444
1960-01-05  6.976833
1960-01-07  8.381410
1960-01-08  7.138014
1960-01-11  8.726798
...              ...
2022-12-09  6.907755
2022-12-13  7.552479
2022-12-17  8.220229
2022-12-22  7.690634
2022-12-31  9.532702

[11447 rows x 1 columns]


In [31]:
mean_energy_usgs = mean_energy_usgs.reset_index()
mean_energy_usgs['Date'] = mean_energy_usgs['time']
mean_energy_usgs['USGS'] = mean_energy_usgs['energy']

mean_energy_etas['ETAS'] = mean_energy_etas['energy']
mean_energy_usgs = mean_energy_usgs.drop(columns = ["time"])
mean_energy_usgs = mean_energy_usgs.drop(columns = ["energy"])
mean_energy_etas = mean_energy_etas.drop(columns = ["energy"])

mean_energy_df = mean_energy_usgs.merge(mean_energy_etas, on='Date')
mean_energy_df['Date'] = mean_energy_df['Date'].dt.to_timestamp()
mean_energy_df.head()

Unnamed: 0,Date,USGS,ETAS
0,1960-01-02,9.302444,7.801926
1,1960-01-05,6.976833,7.828789
2,1960-01-11,8.726798,7.195578
3,1960-01-18,7.759712,7.368272
4,1960-01-20,11.512925,7.98997


In [32]:
max_energy_usgs = max_energy_usgs.reset_index()
max_energy_usgs['Date'] = max_energy_usgs['time']
max_energy_usgs['USGS'] = max_energy_usgs['energy']

max_energy_etas['ETAS'] = max_energy_etas['energy']
max_energy_usgs = max_energy_usgs.drop(columns = ["time"])
max_energy_usgs = max_energy_usgs.drop(columns = ["energy"])
max_energy_etas = max_energy_etas.drop(columns = ["energy"])

max_energy_df = max_energy_usgs.merge(max_energy_etas, on='Date')
max_energy_df['Date'] = max_energy_df['Date'].dt.to_timestamp()
max_energy_df.head()

Unnamed: 0,Date,USGS,ETAS
0,1960-01-02,9.302444,9.785987
1,1960-01-05,6.976833,7.828789
2,1960-01-11,8.726798,7.506427
3,1960-01-18,7.759712,7.368272
4,1960-01-20,11.512925,7.98997


In [33]:
sum_energy_usgs = sum_energy_usgs.reset_index()
sum_energy_usgs['Date'] = sum_energy_usgs['time']
sum_energy_usgs['USGS'] = sum_energy_usgs['energy']

sum_energy_etas['ETAS'] = sum_energy_etas['energy']
sum_energy_usgs = sum_energy_usgs.drop(columns = ["time"])
sum_energy_usgs = sum_energy_usgs.drop(columns = ["energy"])
sum_energy_etas = sum_energy_etas.drop(columns = ["energy"])

sum_energy_df = sum_energy_usgs.merge(sum_energy_etas, on='Date')
sum_energy_df['Date'] = sum_energy_df['Date'].dt.to_timestamp()
sum_energy_df.head()

Unnamed: 0,Date,USGS,ETAS
0,1960-01-02,9.302444,46.811555
1,1960-01-05,6.976833,7.828789
2,1960-01-11,8.726798,28.782314
3,1960-01-18,7.759712,7.368272
4,1960-01-20,11.512925,7.98997


In [34]:
# Create a Plotly figure
fig = px.line(mean_energy_df, x='Date', y=['ETAS', 'USGS'], labels={'value': 'Energy'}, title='ETAS vs USGS Mean Earthquake Energy')
fig.update_layout(
    width=1200,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan', hovermode='x')

# Show the interactive plot
fig.show()

In [35]:
# Sum mag energy dataframe plot
# Create a Plotly figure
fig = px.line(sum_energy_df, x='Date', y=['ETAS', 'USGS'], labels={'value': 'Energy'}, title='ETAS vs USGS Sum Magnitude Energy')
fig.update_layout(
    width=1200,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

# Show the interactive plot
fig.show()

In [36]:
# Max mag energy dataframe plot
# Create a Plotly figure
fig = px.line(max_energy_df, x='Date', y=['ETAS', 'USGS'], labels={'value': 'Energy'}, title='ETAS vs USGS Max Magnitude Energy')
fig.update_layout(
    width=1200,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')

# Show the interactive plot
fig.show()