In [2]:
#import librarires
import numpy as np
import pandas as pd
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import plotly.express as px
import plotly.graph_objects as go
import io
import PIL

In [3]:
#defining constants
binning_time = '4W'
lat_grid_size = 0.5
lon_grid_size = 0.5

#import datasets
usgs = pd.read_csv('datasets\\USGS.csv', sep=',', lineterminator='\n')

In [4]:
#magnitude filtering
usgs = usgs[usgs['mag'] > 3]
usgs['date'] = pd.to_datetime(usgs['date'])

# Determine the start and end dates of the dataset
start_date = usgs['date'].min()
end_date = usgs['date'].max()

In [5]:
def cell_grids(row):    
    lat_cell = int(row['latitude'] // lat_grid_size)
    lon_cell = int(row['longitude'] // lon_grid_size)
    return lat_cell, lon_cell


def grouping(df:pd.DataFrame):
    result = df.groupby(['cell_no', pd.Grouper(key='date', freq=binning_time)])['mag'].sum().reset_index()
    return result

In [6]:
usgs['cell_no'] = usgs.apply(cell_grids, axis=1)
result = grouping(usgs)

min_lat, max_lat = usgs['latitude'].min(), usgs['latitude'].max()
min_lon, max_lon = usgs['longitude'].min(), usgs['longitude'].max()

num_lat_cells = int(np.ceil((max_lat - min_lat) / lat_grid_size))
num_lon_cells = int(np.ceil((max_lon - min_lon) / lon_grid_size))
zmax = result['mag'].max()

# Preallocate arrays
data = [None] * len(result['date'].unique())
heatmap_frames = [None] * len(result['date'].unique())

heatmap = np.zeros((num_lat_cells, num_lon_cells), dtype=float)
# print(heatmap)
scaler = MinMaxScaler()

In [7]:
for frame, (date, frame_data) in enumerate(result.groupby('date')):
    #scaled vs unscaled data
    scaled_mag = scaler.fit_transform(frame_data[['mag']])
    for _, row in frame_data.iterrows():
        lat_cell, lon_cell = row['cell_no']
        lat_idx = int(lat_cell - min_lat / lat_grid_size)
        lon_idx = int(lon_cell - min_lon / lon_grid_size)
        heatmap[lat_idx, lon_idx] = row['mag']
        
        data[frame] = [
            np.where(heatmap == 0, 0, heatmap),
        ]
        heatmap_frames[frame] = go.Frame(
        data=go.Heatmap(
            z=np.where(heatmap == 0, None, heatmap),
            colorscale='Plotly3',
            zmin=3,
            zmax=zmax,
            x=np.linspace(min_lon, max_lon, num=num_lon_cells),
            y=np.linspace(min_lat, max_lat, num=num_lat_cells),
            colorbar=dict(title='Sum of Values')
        ),
        name=str(date)
    )
    
    heatmap.fill(0)

In [8]:
data = np.array(data)
# df = pd.DataFrame(data)
# print(df)
# df.to_csv('heatmap.csv')
# print(data.ndim)
flattened_data = np.reshape(data, (data.shape[0], data.shape[1], -1))
print(flattened_data.ndim)
# df.to_csv('heatmap.csv')
squeezed_data = np.squeeze(flattened_data, axis=1)

# print(squeezed_data)
df = pd.DataFrame(squeezed_data)
print(df.info())

# Save the DataFrame to a CSV file
# df.to_csv('heatmap.csv')

3
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Columns: 400 entries, 0 to 399
dtypes: float64(400)
memory usage: 2.5 MB
None


In [9]:
data = df.values

def create_dataset(data, look_back=1):
    X, Y = [], []
    for i in range(len(data) - look_back):
        a = data[i:(i + look_back), :]
        X.append(a)
        Y.append(data[i + look_back, :])
    return np.array(X), np.array(Y)

In [10]:
look_back = 24
X, Y = create_dataset(data, look_back)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [11]:
# Create and fit the LSTM network
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, data.shape[1])))
model.add(Dense(data.shape[1], activation='relu'))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, Y_train, epochs=100, batch_size=1, verbose=2, validation_split=0.1)

Epoch 1/100


  super().__init__(**kwargs)


575/575 - 3s - 5ms/step - loss: 40.9506 - val_loss: 84.5656
Epoch 2/100
575/575 - 1s - 2ms/step - loss: 40.5765 - val_loss: 83.8265
Epoch 3/100
575/575 - 3s - 5ms/step - loss: 40.2287 - val_loss: 83.4264
Epoch 4/100
575/575 - 3s - 5ms/step - loss: 39.9623 - val_loss: 83.1162
Epoch 5/100
575/575 - 3s - 5ms/step - loss: 39.7085 - val_loss: 82.7271
Epoch 6/100
575/575 - 3s - 4ms/step - loss: 39.4638 - val_loss: 82.5963
Epoch 7/100
575/575 - 3s - 5ms/step - loss: 39.2077 - val_loss: 82.3364
Epoch 8/100
575/575 - 7s - 12ms/step - loss: 39.0072 - val_loss: 82.0664
Epoch 9/100
575/575 - 3s - 6ms/step - loss: 38.7770 - val_loss: 81.8787
Epoch 10/100
575/575 - 4s - 6ms/step - loss: 38.5726 - val_loss: 81.7148
Epoch 11/100
575/575 - 3s - 5ms/step - loss: 38.3918 - val_loss: 81.5745
Epoch 12/100
575/575 - 1s - 2ms/step - loss: 38.1642 - val_loss: 81.3138
Epoch 13/100
575/575 - 1s - 2ms/step - loss: 37.9561 - val_loss: 81.2884
Epoch 14/100
575/575 - 1s - 2ms/step - loss: 37.7676 - val_loss: 81.207

<keras.src.callbacks.history.History at 0x1be39b33890>

In [12]:
# Make predictions
last_values = data[-look_back:]
last_values = last_values.reshape(1, look_back, data.shape[1])
predicted_rows = np.maximum(model.predict(last_values), 0)

for i in range(49):
    last_values = np.append(last_values[:,1:,:], [predicted_rows], axis=1)
    predicted_rows = np.vstack((predicted_rows, model.predict(last_values)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 387ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [13]:
# print(predicted_rows)
df = pd.DataFrame(predicted_rows)
print(df.size)
df.head(20)

20000


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,390,391,392,393,394,395,396,397,398,399
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
df.to_csv('prediction_usgs.csv')

In [15]:
matrices = predicted_rows.reshape(50, 20, 20)

In [16]:
for i, matrix in enumerate(matrices):
    frame = go.Frame(
        data=[go.Heatmap(
             z=np.where(matrix == 0, None, matrix),
            colorscale='Plotly3',
            zmin=0,
            zmax=1,
        )],
        name=str(i)  # Frame name, can be adjusted as needed
    )
    #append new prediction to existing heatmap
    heatmap_frames.append(frame)

# Initialize the figure with the first frame's data
fig = go.Figure(
    data=[go.Heatmap(z=matrices[0], colorscale='Plotly3')],
    layout=go.Layout(
        title='Sum of Magnitude over Time for Each Cell',
        width=700,
        height=700,
        xaxis_title='Longitude',
        yaxis_title='Latitude',
    ),
    frames=heatmap_frames,
)

In [17]:
fig.update_layout(coloraxis_colorbar=dict(title='Sum of Magnitude'))
fig.update_layout(
    updatemenus=[{
        "buttons": [
            {"args": [None, {"frame": {"duration": 500, "redraw": True}, "fromcurrent": True}], "label": "Play", "method": "animate"},
            {"args": [[None], {"frame": {"duration": 0, "redraw": True}, "mode": "immediate"}], "label": "Pause", "method": "animate"}
        ],
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "showactive": False,
        "type": "buttons",
        "x": 0.1,
        "xanchor": "right",
        "y": 0,
        "yanchor": "top"
    }],
    sliders=[{
        "active": 0,
        "yanchor": "top",
        "xanchor": "left",
        "currentvalue": {"font": {"size": 20}, "prefix": "Date: ", "visible": True, "xanchor": "right"},
            "transition": {"duration": 500, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50},
        "len": 0.9,
        "x": 0.1,
        "y": 0,
        "steps": [{"args": [[f.name], {"frame": {"duration": 500, "redraw": True},
                                       "mode": "immediate"}],
                   "label": f.name,
                   "method": "animate"} for f in heatmap_frames]
    }]
)
fig.show()

In [33]:
fig.write_html("usgs_forcast.html")