In [1]:
#import librarires
import numpy as np
import pandas as pd
import datetime as dt
from datetime import datetime, timedelta
import plotly.express as px

In [2]:
#import datasets
etas = pd.read_csv('datasets\\ModifiedETAS.csv', sep=',', lineterminator='\n')
usgs = pd.read_csv('datasets\\USGS.csv', sep=',', lineterminator='\n')

In [3]:
#magnitude filtering
etas = etas[etas['mag'] > 3]
usgs = usgs[usgs['mag'] > 3]

#filter to after 1980
usgs['date'] = pd.to_datetime(usgs['date'], errors='coerce').dt.strftime('%Y-%m-%d')
usgs = usgs[pd.to_datetime(usgs['date']) > pd.to_datetime('1980-01-01')]

In [4]:
threshold = 6
time_before = 0
time_after = 1

In [5]:
large_earthquake_usgs = usgs[usgs['mag'] > threshold]
large_earthquake_usgs = large_earthquake_usgs.sort_values(by=['date','mag'], ascending=[True, False])
filtered_rows = []

last_date = None
last_mag = None

for index, row in large_earthquake_usgs.iterrows():
    date = pd.to_datetime(row['date'])
    mag = row['mag']
    
    if last_date is None or (date - last_date).total_seconds() > 24 * 3600 * time_after:
        filtered_rows.append(row)
        last_date = date
        last_mag = mag
        if mag > last_mag:
            filtered_rows[-1] = row
            last_date = date
            last_mag = mag

large_earthquake_usgs = pd.DataFrame(filtered_rows)

large_earthquake_usgs['date'] = pd.to_datetime(large_earthquake_usgs['date'])

In [6]:
large_earthquake_etas = etas[etas['mag'] > threshold]
large_earthquake_etas = large_earthquake_etas.sort_values(by=['date','mag'], ascending=[True, False])
filtered_rows = []

last_date = None
last_mag = None

for index, row in large_earthquake_etas.iterrows():
    date = pd.to_datetime(row['date'])
    mag = row['mag']
    
    if last_date is None or (date - last_date).total_seconds() > 24 * 3600 * time_after:
        filtered_rows.append(row)
        last_date = date
        last_mag = mag
        if mag > last_mag:
            filtered_rows[-1] = row
            last_date = date
            last_mag = mag

large_earthquake_etas = pd.DataFrame(filtered_rows)

large_earthquake_etas['date'] = pd.to_datetime(large_earthquake_etas['date'])

In [7]:
def bin_data(data:pd.DataFrame, start_date:pd.Timestamp, time_before=int, time_after=int) -> pd.DataFrame:
    data['date'] = pd.to_datetime(data['date'])
    start_date = pd.to_datetime(start_date)
    
    date_range_start = pd.to_datetime(start_date - timedelta(days=time_before))
    date_range_end = pd.to_datetime(start_date + timedelta(days=time_after))
    
    data_binned = data[(data['date'] >= date_range_start) & (data['date'] <= date_range_end)]
    return data_binned

In [8]:
fig = px.scatter()
magnitude_sum_dict = {}
magnitude_count_dict = {}

for index, row in large_earthquake_usgs.iterrows():
    start_date = pd.to_datetime(row['date'])
    binned_data = bin_data(usgs, start_date, time_before, time_after)
    num_earthquakes = binned_data.shape[0] - 1
    magnitude = row['mag']
    
    if magnitude in magnitude_sum_dict:
        magnitude_sum_dict[magnitude] += num_earthquakes
        magnitude_count_dict[magnitude] += 1
    else:
        magnitude_sum_dict[magnitude] = num_earthquakes
        magnitude_count_dict[magnitude] = 1

x = list(magnitude_sum_dict.keys())
y = [magnitude_sum_dict[m] / magnitude_count_dict[m] for m in x]


fig.add_scatter(x=x, y=y, mode='markers')
fig.update_layout(title='Magnitude vs Num Earthquakes (24 Hours After)', xaxis_title='Magnitude', yaxis_title='No. Earthquakes')
fig.show()

In [9]:
fig = px.scatter()
magnitude_sum_dict = {}
magnitude_count_dict = {}

for index, row in large_earthquake_etas.iterrows():
    start_date = pd.to_datetime(row['date'])
    binned_data = bin_data(usgs, start_date, time_before, time_after)
    num_earthquakes = binned_data.shape[0] - 1
    magnitude = row['mag']
    
    if magnitude in magnitude_sum_dict:
        magnitude_sum_dict[magnitude] += num_earthquakes
        magnitude_count_dict[magnitude] += 1
    else:
        magnitude_sum_dict[magnitude] = num_earthquakes
        magnitude_count_dict[magnitude] = 1

x = list(magnitude_sum_dict.keys())
y = [magnitude_sum_dict[m] / magnitude_count_dict[m] for m in x]


fig.add_scatter(x=x, y=y, mode='markers')
fig.update_layout(title='Magnitude vs Num Earthquakes (24 Hours After)', xaxis_title='Magnitude', yaxis_title='No. Earthquakes')
fig.show()