# Time Until Large Events

In [120]:
import pandas as pd
import numpy as np
import datetime as dt

In [121]:
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})


Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.



## Data Filtering
1. Converting the date columns to datetime
2. Date > 1960-01-01 and < 2023-01-01
3. Longitude > -123 and < -113
4. Latitude > 29 and < 39

In [122]:
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

In [123]:
etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [124]:
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

time = []
for i in usgs['Date']:
    time.append(pd.to_datetime(i))
usgs['Date'] = time

In [125]:
usgs.head()

Unnamed: 0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r,Date
240,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,ci,...,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r,2022-12-31
241,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,ci,...,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r,2022-12-31
246,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,nc,...,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r,2022-12-22
262,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,nc,...,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r,2022-12-17
263,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,nc,...,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r,2022-12-13


## Data Grouping And Merging
Data is grouped into 1 day chunks based on the max magnitude

In [126]:
max_mag_etas = pd.DataFrame(etas.groupby(etas['Date'].dt.to_period('S')).Magnitude.max())
max_mag_etas.reset_index(inplace=True)
max_mag_etas.head()

Unnamed: 0,Date,Magnitude
0,1960-01-02 00:00:00,4.25
1,1960-01-03 00:00:00,3.9
2,1960-01-04 00:00:00,4.24
3,1960-01-05 00:00:00,3.4
4,1960-01-06 00:00:00,3.47


In [127]:
max_mag_usgs = pd.DataFrame(usgs.groupby(usgs['Date'].dt.to_period('S')).mag.max())
max_mag_usgs.reset_index(inplace=True)
max_mag_usgs.head()

Unnamed: 0,Date,mag
0,1960-01-02 00:00:00,4.04
1,1960-01-05 00:00:00,3.03
2,1960-01-07 00:00:00,3.64
3,1960-01-08 00:00:00,3.1
4,1960-01-11 00:00:00,3.79


In [128]:
large_earthquake = 6.5

## Large Events
A label is added to Large Event data

In [129]:
large_mag_etas = max_mag_etas.copy()
large_mag_etas["Large Event"] = (large_mag_etas["Magnitude"] > large_earthquake).astype(int)
large_mag_etas["Date"] = large_mag_etas["Date"].dt.to_timestamp()

large_mag_etas = large_mag_etas.copy()

condition = large_mag_etas['Large Event'] == 1
subset = large_mag_etas.loc[condition].copy()
large_mag_etas.loc[condition, 'time_diff'] = subset['Date'].diff().dt.days
large_mag_etas.loc[0, 'time_diff'] = pd.NA

large_mag_usgs = max_mag_usgs.copy()
large_mag_usgs["Large Event"] = (large_mag_usgs["mag"] > large_earthquake).astype(int)
large_mag_usgs["Date"] = large_mag_usgs["Date"].dt.to_timestamp()

large_mag_usgs = large_mag_usgs.copy()

condition = large_mag_usgs['Large Event'] == 1
subset = large_mag_usgs.loc[condition].copy()
large_mag_usgs.loc[condition, 'time_diff'] = subset['Date'].diff().dt.days
large_mag_usgs.loc[0, 'time_diff'] = pd.NA


In [130]:
large_mag_etas.head()

Unnamed: 0,Date,Magnitude,Large Event,time_diff
0,1960-01-02,4.25,0,
1,1960-01-03,3.9,0,
2,1960-01-04,4.24,0,
3,1960-01-05,3.4,0,
4,1960-01-06,3.47,0,


In [131]:
large_mag_usgs.head()

Unnamed: 0,Date,mag,Large Event,time_diff
0,1960-01-02,4.04,0,
1,1960-01-05,3.03,0,
2,1960-01-07,3.64,0,
3,1960-01-08,3.1,0,
4,1960-01-11,3.79,0,


## Graphing Time Until Large Events

In [132]:
import plotly.express as px
import plotly.graph_objects as go

In [133]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=large_mag_etas['Date'],
    y=large_mag_etas['time_diff'],
    mode='markers',
    marker=dict(color='red', line=dict(color='black', width=1)),
    name='ETAS'
))

fig.add_trace(go.Scatter(
    x=large_mag_usgs['Date'],
    y=large_mag_usgs['time_diff'],
    mode='markers',
    marker=dict(color='blue', line=dict(color='black', width=1)),
    name='USGS'
))

fig.update_layout(
    title='Time Between Large Events -ETAS vs USGS',
    xaxis_title='Date',
    yaxis_title='Time Difference (Days)',
    legend=dict(x=0, y=1, traceorder='normal', orientation='h')  # Adjust legend position
)

fig.show()

## Calcutating Energy
Converting the magnitudes to energy through the formula: (1/1.5) * log(10^(1.5*mag))

In [134]:
const = (1/1.5)
large_mag_etas['Energy'] = 10**(1.5*large_mag_etas['Magnitude'])
large_mag_etas['Energy'] = np.log(large_mag_etas['Magnitude'])*const

const = (1/1.5)
large_mag_usgs['Energy'] = 10**(1.5*large_mag_usgs['mag'])
large_mag_usgs['Energy'] = np.log(large_mag_usgs['mag'])*const

In [135]:
fig = go.Figure()

def add_cumulative_energy_plot(dataset, color, name_prefix):
    large_events = dataset[dataset["Large Event"] == 1]

    for i in range(len(large_events) - 1):
        start_date = large_events.iloc[i]["Date"]
        end_date = large_events.iloc[i + 1]["Date"]

        subset = dataset[(dataset["Date"] >= start_date) & (dataset["Date"] < end_date)]

        cumulative_energy = subset["Energy"].cumsum()

        fig.add_trace(go.Scatter(
            x=subset["Date"],
            y=cumulative_energy,
            mode='lines',
            name=f'{name_prefix} Event {i+1}-{i+2}',
            line=dict(color=color, width=2)
        ))

In [136]:
# Add cumulative energy plots for USGS and ETAS datasets
add_cumulative_energy_plot(large_mag_usgs, 'green', 'USGS')
add_cumulative_energy_plot(large_mag_etas, 'blue', 'ETAS')

fig.update_layout(
    title='Cumulative Energy Between Large Events USGS vs ETAS',
    xaxis_title='Date',
    yaxis_title='Cumulative Energy',
)

fig.show()

In [137]:
# etas['Large'] = etas['Magnitude'] > large_earthquake

# # Find the indices of large earthquakes
# large_earthquake_indices = etas.index[etas['Large']].tolist()

# # Initialize lists to store the sum of energies
# sum_energies_usgs = []

# # Calculate sum of energies between consecutive large earthquakes
# for i in range(len(large_earthquake_indices) - 1):
#     start_index = large_earthquake_indices[i]
#     end_index = large_earthquake_indices[i + 1]

#     # Extract the relevant subset of data
#     subset = etas.loc[start_index:end_index]

#     # Calculate the sum of energies in this interval
#     interval_energy_sum = subset['Energy'].sum()

#     # Append the sum of energies to the list
#     sum_energies_usgs.append(interval_energy_sum)

In [138]:

# usgs['Large'] = usgs['mag'] > large_earthquake

# # Find the indices of large earthquakes
# large_earthquake_indices = usgs.index[usgs['Large']].tolist()

# # Initialize lists to store the sum of energies
# sum_energies_etas = []

# # Calculate sum of energies between consecutive large earthquakes
# for i in range(len(large_earthquake_indices) - 1):
#     start_index = large_earthquake_indices[i]
#     end_index = large_earthquake_indices[i + 1]

#     # Extract the relevant subset of data
#     subset = usgs.loc[start_index:end_index]

#     # Calculate the sum of energies in this interval
#     interval_energy_sum = subset['Energy'].sum()

#     # Append the sum of energies to the list
#     sum_energies_etas.append(interval_energy_sum)

In [139]:
# usgs_energy_sum = {'Large Earthquake Interval Index': range(len(sum_energies_usgs)), 'Sum of Energies In USGS': sum_energies_usgs}
# usgs_energy_sum = pd.DataFrame(usgs_energy_sum)

# etas_energy_sum = {'Large Earthquake Interval Index': range(len(sum_energies_etas)), 'Sum of Energies In ETAS': sum_energies_etas}
# etas_energy_sum = pd.DataFrame(etas_energy_sum)

In [140]:
# fig = go.Figure()

# fig.add_trace(go.Scatter(x=usgs_energy_sum['Large Earthquake Interval Index'], y=usgs_energy_sum['Sum of Energies In USGS'],
#                          mode='lines+markers', name='Sum of Energies In USGS'))

# fig.add_trace(go.Scatter(x=etas_energy_sum['Large Earthquake Interval Index'], y=etas_energy_sum['Sum of Energies In ETAS'],
#                          mode='lines+markers', name='Sum of Energies In ETAS', line=dict(color='red')))

# fig.update_layout(
#     xaxis=dict(title='Large Earthquake Interval Index'),
#     yaxis=dict(title='Sum of Energies'),
#     title='Sum of Energies between Large Earthquakes'
# )

# fig.show()