In [1]:
#import librarires
import numpy as np
import pandas as pd
import datetime as dt
import plotly.express as px

In [2]:
#import datasets
etas = pd.read_csv('datasets\\ModifiedETAS.csv', sep=',', lineterminator='\n')
usgs = pd.read_csv('datasets\\USGS.csv', sep=',', lineterminator='\n')

In [3]:
#magnitude filtering
etas = etas[etas['mag'] > 3]
usgs = usgs[usgs['mag'] > 3]

In [4]:
#calculating energy from magnitude
formula_constant = (1/1.5)

usgs['energy'] = 10**(1.5*usgs['mag'])
usgs['energy'] = np.log(usgs['energy']) * formula_constant
etas['energy'] = 10**(1.5*etas['mag'])
etas['energy'] = np.log(etas['energy']) * formula_constant

In [5]:
#defining magnitude cutoff value and constants for binning
magnitude_cutoff = 7
binning_time = '2W'
date_range = 1 #year

In [6]:
#filtering data points by magnitude cutoff
usgs_large = usgs.copy()
usgs_large = usgs_large[usgs_large['mag'] >= magnitude_cutoff]

etas_large = etas.copy()
etas_large = etas_large[etas_large['mag'] >= magnitude_cutoff]

# print(usgs_large, etas_large)

In [7]:
#function to bin data into chunks 
def bin_data(data:pd.DataFrame, start_date:pd.Timestamp, freq:str, range:int):
    data['date'] = pd.to_datetime(data['date'])
    start_date = pd.to_datetime(start_date)
    
    date_range_start = pd.to_datetime(start_date - pd.DateOffset(years=range))
    date_range_end = pd.to_datetime(start_date + pd.DateOffset(years=range))
    
    data_binned = data[(data['date'] >= date_range_start) & (data['date'] <= date_range_end)]
    return pd.DataFrame(data_binned.groupby(pd.Grouper(key='date', freq=freq)).energy.sum())
    

In [8]:
#graphing usgs data 
fig = px.scatter()
for index, row in usgs_large.iterrows():
    start_date = pd.to_datetime(row['date'])
    usgs_binned = bin_data(usgs, start_date, binning_time, date_range)
    # print(usgs_binned)
    relative_dates = (pd.to_datetime(usgs_binned.index) - start_date).days
    
    fig.add_scatter(x=relative_dates, y=usgs_binned['energy'], mode='lines+markers', name=f'Date: {start_date}')

fig.update_layout(title='Energy Before/After Large Earthquakes (USGS)',
                  xaxis_title='Days Before/After Large Earthquake',
                  yaxis_title='Energy',
                  legend_title='Date')

fig.show()

In [9]:
#graphing etas data 
fig = px.scatter()
for index, row in etas_large.iterrows():
    start_date = pd.to_datetime(row['date'])
    etas_binned = bin_data(etas, start_date, binning_time, date_range)
    # print(etas_binned)
    relative_dates = (pd.to_datetime(etas_binned.index) - start_date).days
    
    fig.add_scatter(x=relative_dates, y=etas_binned['energy'], mode='lines+markers', name=f'Date: {start_date}')

fig.update_layout(title='Energy Before/After Large Earthquakes (ETAS)',
                  xaxis_title='Days Before/After Large Earthquake',
                  yaxis_title='Energy',
                  legend_title='Date')

fig.show()

In [10]:
#finding the average energy spread before/after large events of usgs        
def average_spread(df:pd.DataFrame, df_large:pd.DataFrame):
    dataframes = []
    for index, row in df_large.iterrows():
        start_date = pd.to_datetime(row['date'])
        df_binned = bin_data(df, start_date, binning_time, date_range)
        # print(df_binned.count())
        
        df_binned = df_binned.reset_index()
        index = df_binned.index
        df_binned['index'] = index
        df_binned = df_binned.set_index('index')
        df_binned = df_binned.drop(columns='date')
        # print(df_binned)

        dataframes.append(df_binned)
    
    avg_df = pd.concat(i for i in dataframes).groupby('index').mean()
    avg_df = avg_df.reset_index()
    return avg_df

In [11]:
usgs_avg = average_spread(usgs, usgs_large)
etas_avg = average_spread(etas, etas_large)

#coloring the markers by the difference in the data
color = ['red' if (i - j) < 0 else 'blue' for i, j in zip(etas_avg['energy'], usgs_avg['energy'])]

fig = px.scatter()
fig.add_scatter(x=etas_avg['index'], y=etas_avg['energy'] - usgs_avg['energy'], mode='lines+markers', marker=dict(color=color), line=dict(color='black'))
fig.update_layout(title='Average Difference In Trend Between ETAS & USGS',
                  xaxis_title='Index Value',
                  yaxis_title='Energy Difference',
                 )

fig.show()


In [12]:
#function to bin data before and after major event based on location
#* note that there is no timed (2 week) binning
def location_binning(data: pd.DataFrame, start_date: pd.Timestamp, date_range: float):
    date_range_start = start_date - pd.DateOffset(months=date_range)
    date_range_end = start_date + pd.DateOffset(months=date_range)

    data_binned = data[(data['date'] >= date_range_start) & (data['date'] <= date_range_end)]
    return data_binned

In [13]:
#redefining magnitude cutoff value and range to prevent lag
magnitude_cutoff = 7
date_range = 6 #month

etas_large = etas_large[etas_large['mag'] >= magnitude_cutoff]
usgs_large = usgs_large[usgs_large['mag'] >= magnitude_cutoff]

In [14]:
#new dataframe to store binned data
binned_data = pd.DataFrame()

for index, row in etas_large.iterrows():
    start_date = pd.to_datetime(row['date'])
    #binning data before/after major event
    etas_binned = location_binning(etas, start_date, date_range)
    relative_dates = (pd.to_datetime(etas_binned['date']) - start_date).dt.days
    
    binned_data = pd.concat([
        binned_data,
        pd.DataFrame({
            'Day 0': [start_date.strftime('%Y-%m-%d')] * len(etas_binned),
            'Latitude': etas_binned['latitude'],
            'Longitude': etas_binned['longitude'],
            'Relative_Dates': relative_dates,
            'Energy': etas_binned['energy']
        })
    ])

fig = px.scatter(
    binned_data,
    x='Longitude',
    y='Latitude',
    color='Relative_Dates',
    size='Energy',
    animation_frame='Day 0',
    title='Energy Locations Before/After Large Earthquakes (ETAS)',
    labels={'Relative_Dates': 'Days +/- Large Event'},
    color_continuous_scale='Viridis',
    opacity=0.7,
    size_max=10,
    width=800,
    height=800
)

fig.show()

In [15]:
#new dataframe to store binned data
plot_data = pd.DataFrame()

for index, row in usgs_large.iterrows():
    start_date = pd.to_datetime(row['date'])
    #binning data before/after major event
    usgs_binned = location_binning(usgs, start_date, date_range)
    relative_dates = (pd.to_datetime(usgs_binned['date']) - start_date).dt.days
    
    plot_data = pd.concat([
        plot_data,
        pd.DataFrame({
            'Day 0': [start_date.strftime('%Y-%m-%d')] * len(usgs_binned),
            'Latitude': usgs_binned['latitude'],
            'Longitude': usgs_binned['longitude'],
            'Relative_Dates': relative_dates,
            'Energy': usgs_binned['energy']
        })
    ])

fig = px.scatter(
    plot_data,
    x='Longitude',
    y='Latitude',
    color='Relative_Dates',
    size='Energy',
    animation_frame='Day 0',
    title='Energy Locations Before/After Large Earthquakes (USGS)',
    labels={'Relative_Dates': 'Days +/- Large Event'},
    color_continuous_scale='Viridis',
    opacity=0.7,
    size_max=10,
    width=800,
    height=800
)

fig.show()