### Create 2D array

In [None]:
import numpy as np
import pandas as pd

# Choose frequency, binsize, longitude, latitude
freq = 'W'
binsize = 5
longitude_W = 134 # minimum is 134
longitude_E = 174 # maximum is 174
latitude_S = 10 # minimum is 10
latitude_N = 60 # minimum is 60

# load earthquake data for defined area
data = pd.read_csv('data/Japan_10_60_134_174_1973_2023_V2.csv')
data['Time'] = pd.to_datetime(data.Time)
data = data[(data.Longitude >= longitude_W) & (data.Longitude <= longitude_E) & (data.Latitude >= latitude_S) & (data.Latitude <= latitude_N)]
data.set_index('Time', inplace=True)
df = data.sort_index()

# Bin the longitude and latitude values into 2x2 degree bins
df['Longitude_bin'] = pd.cut(df['Longitude'], bins=np.arange(longitude_W, longitude_E+1, binsize))  # Change bin size to 2 degrees
df['Latitude_bin'] = pd.cut(df['Latitude'], bins=np.arange(latitude_S, latitude_N+1, binsize))  # Change bin size to 2 degrees

# Group the data by longitude bin, latitude bin, depth bin, and day, and compute the maximum magnitude within each group
grouped = df.groupby(['Longitude_bin', 'Latitude_bin', pd.Grouper(freq=freq, level="Time")]).max()['Magnitude']
grouped = grouped.unstack().fillna(0)

# Reshape the resulting data into a tensor with shape (1, time, depth, longitude, latitude)
time = len(grouped.columns)
longitude = len(grouped.index.levels[0])
latitude = len(grouped.index.levels[1])
tensor = np.zeros((time, longitude, latitude))

for t in range(time):
    tensor[t, :, :] = grouped.iloc[:, t].values.reshape(longitude, latitude)

# Rotate dimensions corresponding to 20 and 25, 90 degrees anti-clockwise
tensor = np.transpose(tensor, axes=(0, 2, 1))
tensor = np.flip(tensor, axis=1)

# Print the shape of the resulting tensor
print(tensor.shape)

### Visualise data (tensor)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Choose a timestep to plot (e.g. the first timestep)
timestep = 18

# Extract the data for the chosen timestep from the tensor

time_slice = tensor[timestep, :, :]

# Create a heatmap plot of the data using Seaborn
sns.set(rc={'figure.figsize':(4.8,6)})
sns.heatmap(time_slice, cmap='viridis', vmin=-1, vmax=10, linewidths=0.5, linecolor='grey', annot=False, xticklabels=grouped.index.levels[0], yticklabels=grouped.index.levels[1])

# Set the plot title and axis labels
plt.title(f'Earthquake magnitudes at timestep {timestep}')
plt.xlabel('Longitude')
plt.ylabel('Latitude')

# Show the plot
plt.show()

In [None]:
# Reshape tensor into matrix
matrix = np.reshape(tensor, (tensor.shape[0], -1))
df_transformed = pd.DataFrame(matrix).set_index(grouped.T.index)

# print column number with highest sum
print("column with highest sum of magnitudes:", df_transformed.sum(axis=0).argmax())

In [None]:
import seaborn as sns

sns.set(rc={'figure.figsize':(10,3)})
plot = sns.lineplot(data=df_transformed[33], linewidth = .2, legend=False)
plot.set_xlabel("Time in months")
plot.set_ylabel("Magnitude")
plot.set_title("Earthquakes for column 33")


# Implement AutoRegressive model

In [None]:
# import necessary libraries
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Choose lookback-window with train_size (multiple of freq)
train_size = 12 * 2
total_splits = len(data)-train_size
# test_size is number of timewindows to predict into the future (is 1)
test_size = 1

# Generate split
cv = TimeSeriesSplit(n_splits=total_splits, max_train_size=train_size ,test_size=test_size)

mae_total = 0
TP = 0
FP = 0
TN = 0
FN = 0

y_true = []
y_pred = []

# Train the model and safe predictions
for train_index, test_index in cv.split(data):
    #print("TRAIN:", train_index, "TEST:", test_index)

    # fit an ARIMA model
    model = ARIMA(data[train_index], order=(1, 1, 0))  # (p, d, q) order
    model_fit = model.fit()

    # forecast next week's magnitudes
    forecast = model_fit.forecast(steps=test_size)
    # print('true:', data[test_index][0], 'prediction:', round(forecast[0],1))

    # evaluate model performance
    mae = mean_absolute_error(data[test_index], forecast)
    mae_total += mae
    #print('inermediate MSE:', mse)

    if data[test_index][0] >= mag:
        y_true.append(1)
        if forecast[0] >= mag:
            y_pred.append(1)
            TP += 1
        if forecast[0] < mag:
            FN += 1
            y_pred.append(0)
    if data[test_index][0] < mag:
        y_true.append(0)
        if forecast[0] >= mag:
            y_pred.append(1)
            FP += 1
        if forecast[0] < mag:
            y_pred.append(0)
            TN += 1

# Evaluate the performance
acc = (TP+TN) / (TP+TN+FP+FN)
precision = TP / (TP+FP)
recall = TP / (TP+FN)
specificity = TN / (TN+FP)

print('accuracy:', acc)
print('precision:', precision)
print('recall:', recall)
print('specificity:', specificity)
print('Mean Absolute Error:', mae_total/total_splits)

cm = confusion_matrix(y_true, y_pred)
class_names = ['M<4.5','M>=4.5']

sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Print model summary
print(model_fit.summary())


In [None]:
import seaborn as sns
import numpy as np

sns.set(rc={'figure.figsize':(20,5)})
plot = sns.lineplot(data=dataset, linewidth = .2, legend=False)
plot.set_xlabel("Time in months")
plot.set_ylabel("Magnitude")
plot.set_title("Earthquakes for specified spatial bin")

## visualise autocorrelation (ACF) to define MA

### autocorrelation plot

In [None]:
import matplotlib.pyplot as plt
from pandas.plotting import autocorrelation_plot

sns.set(rc={'figure.figsize':(10,3)})
autocorrelation_plot(df_transformed[33])
plt.ylim(-.2, .2)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plot_acf(df_transformed[33], lags=500)
plt.ylim(-.2, .2)
plt.show()

## visualize PACF to define AR

In [None]:
pacf_val = pacf(data)
plt.bar(range(0,len(pacf_val)),pacf_val)

## Persistance model (baseline)

In [None]:
from sklearn.metrics import mean_squared_error

subregion = df_transformed[33]

true = subregion[:-1]
pred = subregion.shift(-1)[:-1]

mse = mean_squared_error(true, pred)
print("MSE:",mse)