# Historical Data Analysis

In [431]:
import pandas as pd
import numpy as np
import datetime as dt

In [432]:
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})


Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.



Data Filtering
1. Converting the date columns to datetime
2. Date > 1960-01-01 and < 2023-01-01
3. Longitude > -123 and < -113
4. Latitude > 29 and < 39

In [433]:
#converting the Date column into datetime format
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)

#filter the dataset by Date > 1960-01-01 and Date < 2023-01-1 
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [434]:
usgs["time"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
#filter the dataset by Date > 1960-01-01 and Date < 2023-01-1 
usgs = usgs[(pd.to_datetime(usgs['time']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['time']) < pd.to_datetime('2023-01-01'))]
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
usgs.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r
240,2022-12-31,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,...,2023-09-22T21:50:30.029Z,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r
241,2022-12-31,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,...,2023-03-07T19:00:01.040Z,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r
246,2022-12-22,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,...,2023-04-20T04:34:00.806Z,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r
262,2022-12-17,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,...,2023-07-27T08:15:34.318Z,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r
263,2022-12-13,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,...,2023-02-18T22:04:08.040Z,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r


In [435]:
time = []
for i in usgs['time']:
    time.append(pd.to_datetime(i))
usgs['time'] = time

We then group the data into 1 month chunks and perform several operations with the goal of comparing trends across ETAS and USGS

In [436]:
# Mean magnitude for each month
mean_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('M')).Magnitude.mean())

# Sum of magnitudes for each month
sum_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('M')).Magnitude.sum())

# Mean magnitude for each month
mean_mag_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('M')).mag.mean())

# Sum of magnitudes for each month
sum_mag_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('M')).mag.sum())

# Maximum magnitude earthquake for each month
max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('M')).Magnitude.max())

# Total number of earthquakes for each month
earthquake_count_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('M')).Magnitude.count())

# Maximum magnitude earthquake for each month
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('M')).mag.max())

# Total number of earthquakes for each month
earthquake_count_usgs = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('M')).mag.count())

Merging related USGS and ETAS datasets for plotting

In [437]:
mean_mag_etas = mean_mag_etas.reset_index()
mean_mag_etas["ETAS"] = mean_mag_etas["Magnitude"]
mean_mag_etas = mean_mag_etas.drop(columns = ["Magnitude"])

mean_mag_usgs = mean_mag_usgs.reset_index()
mean_mag_usgs['Date'] = mean_mag_usgs['time']
mean_mag_usgs["USGS"] = mean_mag_usgs["mag"]
mean_mag_usgs = mean_mag_usgs.drop(columns = ["mag"])
mean_mag_usgs = mean_mag_usgs.drop(columns = ["time"])

merged_mean_mag = mean_mag_etas.merge(mean_mag_usgs, on='Date')
merged_mean_mag.head()

Unnamed: 0,Date,ETAS,USGS
0,1960-01,3.325385,3.668333
1,1960-02,3.417838,3.35375
2,1960-03,3.496923,3.346667
3,1960-04,3.385135,3.621111
4,1960-05,3.438857,3.423333


In [438]:
import plotly.express as px
import plotly.graph_objects as go

In [439]:
merged_mean_mag['Date'] = merged_mean_mag['Date'].dt.to_timestamp()

fig = px.line(merged_mean_mag, x='Date', 
              y=['ETAS', 'USGS'], labels={'value': 'Magnitude'}, 
              title='ETAS vs USGS Mean Earthquake Magnitudes')
fig.update_layout(
    width=900,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan', hovermode='x')

fig.show()

In [440]:
sum_mag_etas = sum_mag_etas.reset_index()
sum_mag_etas["ETAS"] = sum_mag_etas["Magnitude"]
sum_mag_etas = sum_mag_etas.drop(columns = ["Magnitude"])

sum_mag_usgs = sum_mag_usgs.reset_index()
sum_mag_usgs['Date'] = sum_mag_usgs['time']
sum_mag_usgs["USGS"] = sum_mag_usgs["mag"]
sum_mag_usgs = sum_mag_usgs.drop(columns = ["mag"])
sum_mag_usgs = sum_mag_usgs.drop(columns = ["time"])

merged_sum_mag = sum_mag_etas.merge(sum_mag_usgs, on='Date')
merged_sum_mag.head()


Unnamed: 0,Date,ETAS,USGS
0,1960-01,172.92,44.02
1,1960-02,126.46,26.83
2,1960-03,181.84,20.08
3,1960-04,125.25,32.59
4,1960-05,120.36,20.54


In [441]:
merged_sum_mag['Date'] = merged_sum_mag["Date"].dt.to_timestamp()

# Create a Plotly figure
fig = px.line(merged_sum_mag, x='Date', y=['ETAS', 'USGS'], labels={'value': 'Magnitude'}, title='ETAS vs USGS Sum Of Magnitudes')

fig.update_layout(
    width=900,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan', hovermode='x')

# Show the interactive plot
fig.show()

In [442]:
max_mag_etas = max_mag_etas.reset_index()
max_mag_etas["ETAS"] = max_mag_etas["Magnitude"]
max_mag_etas = max_mag_etas.drop(columns = ["Magnitude"])

max_mag_usgs = max_mag_usgs.reset_index()
max_mag_usgs['Date'] = max_mag_usgs['time']
max_mag_usgs["USGS"] = max_mag_usgs["mag"]
max_mag_usgs = max_mag_usgs.drop(columns = ["mag"])
max_mag_usgs = max_mag_usgs.drop(columns = ["time"])

merged_max_mag = max_mag_etas.merge(max_mag_usgs, on='Date')
merged_max_mag.head()

Unnamed: 0,Date,ETAS,USGS
0,1960-01,4.25,5.0
1,1960-02,4.68,3.79
2,1960-03,5.22,3.67
3,1960-04,4.49,4.24
4,1960-05,4.29,4.0


In [443]:
merged_max_mag['Date'] = merged_max_mag['Date'].dt.to_timestamp()

# Create a Plotly figure
fig = px.line(merged_max_mag, x='Date', y=['ETAS', 'USGS'], labels={'value': 'Magnitude'}, title='ETAS vs USGS Max Magnitude')
fig.update_layout(
    width=900,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan', hovermode='x')

# Show the interactive plot
fig.show()

In [444]:
earthquake_count_etas = earthquake_count_etas.reset_index()
earthquake_count_etas["ETAS"] = earthquake_count_etas["Magnitude"]
earthquake_count_etas = earthquake_count_etas.drop(columns = ["Magnitude"])

earthquake_count_usgs = earthquake_count_usgs.reset_index()
earthquake_count_usgs['Date'] = earthquake_count_usgs['time']
earthquake_count_usgs["USGS"] = earthquake_count_usgs["mag"]
earthquake_count_usgs = earthquake_count_usgs.drop(columns = ["mag"])
earthquake_count_usgs = earthquake_count_usgs.drop(columns = ["time"])

merged_earthquake_counts = earthquake_count_etas.merge(earthquake_count_usgs, on='Date')
merged_earthquake_counts.head()


Unnamed: 0,Date,ETAS,USGS
0,1960-01,52,12
1,1960-02,37,8
2,1960-03,52,6
3,1960-04,37,9
4,1960-05,35,6


In [445]:
merged_earthquake_counts['Date'] = merged_earthquake_counts['Date'].dt.to_timestamp()

# Create a Plotly figure
fig = px.line(merged_earthquake_counts, x='Date', y=['ETAS', 'USGS'], labels={'value': 'Number of Earthquakes'}, title='ETAS vs USGS Number of Earthquakes')
fig.update_layout(
    width=900,
    height=600,
)

fig.update_traces(line=dict(width=1.0))
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan', hovermode='x')

# Show the interactive plot
fig.show()

In [446]:
usgs_max_df = usgs_max.copy()
etas_max_df = etas_max.copy()

merged_max_df = usgs_max_df.merge(etas_max_df, on='Date', how='inner')
merged_max_df['Result'] = merged_max_df['USGS'] - merged_max_df['ETAS']

In [447]:
origin = 0
merged_max_df['Date'] = merged_max_df['Date'].astype(str)

merged_max_df['Color'] = ['red' if y < origin else 'blue' for y in merged_max_df['Result']]

fig = px.scatter(merged_max_df, x='Date', y='Result', color='Color',
                 title='Difference between USGS and ETAS Max Magnitude',
                 labels={'Result': 'Result', 'Date': 'Date'},
                 color_discrete_map={'red': 'red', 'blue': 'blue'},
                 hover_name='Result', hover_data={'Color': False})

fig.add_trace(px.line(merged_max_df, x='Date', y='Result').data[0])

fig.update_layout(shapes=[dict(type='line', y0=origin, y1=origin, x0=min(merged_max_df['Date']), x1=max(merged_max_df['Date']),
                              line=dict(color='black', dash='dash'))])

fig.show()


In [448]:
greater = y_values > 0
print(len(greater))
lesser = y_values < 0
print(len(lesser))

86
86


ETAS seems to have predicted as many earthquakes with greater magnitudes than what USGS records show as it did earthquakes with lesser magnitudes.