# Request data from USGS and save to CSV

In [None]:
import requests
import pandas as pd

# Set the API endpoint URL
url = 'https://earthquake.usgs.gov/fdsnws/event/1/query'

# Define the bounding box for the area of interest
min_latitude = 10
max_latitude = 60
min_longitude = 134 #117 is wide
max_longitude = 174 #165 more tight

# Create an empty list to hold the earthquake data
earthquakes = []

for year in range(1973, 2023):
    for month in range(1, 13):
        # Set the parameters for the API request
        starttime = f'{year}-{month:02d}-01'
        endtime = f'{year}-{month+1:02d}-01'
        if month == 12:
            endtime = f'{year+1}-01-01'
        params = {
            'format': 'geojson',
            'starttime': starttime,
            'endtime': endtime,
            'minmagnitude': '0',
            'maxmagnitude': '10',
            'minlatitude': min_latitude,
            'maxlatitude': max_latitude,
            'minlongitude': min_longitude,
            'maxlongitude': max_longitude
        }

        # Send the API request and get the response
        response = requests.get(url, params=params)

        # Parse the JSON response
        data = response.json()

        # Extract the data for each earthquake and append it to the list
        for feature in data['features']:
            longitude = feature['geometry']['coordinates'][0]
            latitude = feature['geometry']['coordinates'][1]
            time = pd.to_datetime(feature['properties']['time'], unit='ms')
            magnitude = feature['properties']['mag']
            earthquake = {'Longitude': longitude, 'Latitude': latitude, 'Time': time, 'Magnitude': magnitude}
            earthquakes.append(earthquake)

# Create a DataFrame from the list of earthquake data
df = pd.DataFrame(earthquakes)

# Cut off magnitudes of 0
df = df[df.Magnitude > 0]

# save as csv
df.to_csv('data/Japan_10_60_134_174_1973_2023.csv')

# Print the first few rows of the DataFrame
print(df.head())
print(df.shape, df.Magnitude.min())


## Transform CSV to 3D numpy array (where D3 are bins of magnitude) and save .npy

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Load earthquake data
data = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv')

# Define area of interest
min_lon = 134
max_lon = 174
min_lat = 10
max_lat = 60

# Define time window size (1 week in this case)
window_size = timedelta(weeks=1)

# Calculate number of time windows
data['Time'] = pd.to_datetime(data.Time)
start_time = data['Time'].min()
end_time = data['Time'].max()
num_time_windows = (end_time - start_time) // window_size + 1

# Define spatial bin size (2 degrees in this case)
bin_size = 2

# Calculate number of spatial bins
num_lon_bins = int((max_lon - min_lon) / bin_size)
num_lat_bins = int((max_lat - min_lat) / bin_size)
num_bins = num_lon_bins * num_lat_bins

# Define magnitude bin size and number of bins
num_mag_bins = 10

# Create tensor with zeros
tensor = np.zeros((num_bins, num_time_windows, num_mag_bins), dtype=int)

# Iterate over time windows
for i in range(num_time_windows):
    # Get earthquake data within current time window
    mask = (data['Time'] >= start_time) & (data['Time'] < start_time + window_size)
    window_data = data.loc[mask]

    # Iterate over spatial bins
    for lon in range(min_lon, max_lon, bin_size):
        for lat in range(min_lat, max_lat, bin_size):
            # Get earthquake data within current spatial bin
            bin_data = window_data[(window_data['Longitude'] >= lon) & (window_data['Longitude'] < lon + bin_size) & 
                                   (window_data['Latitude'] >= lat) & (window_data['Latitude'] < lat + bin_size)]

            
            # Bin magnitudes between 0 and 10 and count number of earthquakes in each bin
            magnitudes = bin_data['Magnitude']
            counts, _ = np.histogram(magnitudes, bins=np.linspace(0, 10, num_mag_bins+1))

            # Store counts in tensor
            bin_idx = (lon - min_lon)//bin_size*num_lat_bins + (lat - min_lat)//bin_size
            tensor[bin_idx, i, :] = counts
    
    # Increment time window
    start_time += window_size


# Print tensor shape
np.save('data/Japan_10_60_134_174_1973_2023.npy',tensor)
print(tensor.shape)

## Convert CSV to 2D numpy array where rows are 2x2 degrees pixels and columns are bins of time of a day

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Load earthquake data
data = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv')

# Define area of interest
min_lon = 134
max_lon = 174
min_lat = 10
max_lat = 60

# Define time window size (1 day in this case)
window_size = timedelta(days=1)

# Calculate number of time windows
data['Time'] = pd.to_datetime(data.Time)
start_time = data['Time'].min()
end_time = data['Time'].max()
num_time_windows = (end_time - start_time) // window_size + 1

# Define spatial bin size (2 degrees in this case)
bin_size = 2

# Calculate number of spatial bins
num_lon_bins = int((max_lon - min_lon) / bin_size)
num_lat_bins = int((max_lat - min_lat) / bin_size)
num_bins = num_lon_bins * num_lat_bins

# Create tensor with zeros
tensor = np.zeros((num_bins, num_time_windows))

# Iterate over time windows
for i in range(num_time_windows):
    # Get earthquake data within current time window
    mask = (data['Time'] >= start_time) & (data['Time'] < start_time + window_size)
    window_data = data.loc[mask]

    # Iterate over spatial bins
    for lon in range(min_lon, max_lon, bin_size):
        for lat in range(min_lat, max_lat, bin_size):
            # Get earthquake data within current spatial bin
            bin_data = window_data[(window_data['Longitude'] >= lon) & (window_data['Longitude'] < lon + bin_size) & 
                                   (window_data['Latitude'] >= lat) & (window_data['Latitude'] < lat + bin_size)]

            # Check if there are any earthquakes in the current bin
            if not bin_data.empty:
                # Find the maximum magnitude in the current bin
                max_mag = bin_data['Magnitude'].max()

                # Store maximum magnitude in tensor
                bin_idx = (lon - min_lon)//bin_size*num_lat_bins + (lat - min_lat)//bin_size
                tensor[bin_idx, i] = max_mag
    
    # Increment time window
    start_time += window_size

print(tensor.shape)

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Load earthquake data
data = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv')

# Define area of interest
min_lon = 134
max_lon = 174
min_lat = 10
max_lat = 60

# Define time window size (1 day in this case)
window_size = timedelta(days=1)

# Calculate number of time windows
data['Time'] = pd.to_datetime(data.Time)
start_time = data['Time'].min()
end_time = data['Time'].max()
num_time_windows = (end_time - start_time) // window_size + 1

# Define spatial bin size (2 degrees in this case)
bin_size = 2

# Calculate number of spatial bins
num_lon_bins = int((max_lon - min_lon) / bin_size)
num_lat_bins = int((max_lat - min_lat) / bin_size)
num_bins = num_lon_bins * num_lat_bins

# Create tensor with zeros
tensor = np.zeros((num_bins, num_time_windows))

# Create DataFrame to store bin information
bins_df = pd.DataFrame(columns=['Longitude', 'Latitude'])

# Iterate over spatial bins
for lon in range(min_lon, max_lon, bin_size):
    for lat in range(min_lat, max_lat, bin_size):
        # Add bin information to DataFrame
        bins_df = pd.concat([bins_df, pd.DataFrame({'Longitude': lon, 'Latitude': lat}, index=[0])], ignore_index=True)


# Iterate over time windows
for i in range(num_time_windows):
    # Get earthquake data within current time window
    mask = (data['Time'] >= start_time) & (data['Time'] < start_time + window_size)
    window_data = data.loc[mask]

    # Iterate over spatial bins
    for j in range(num_bins):
        lon = bins_df.loc[j, 'Longitude']
        lat = bins_df.loc[j, 'Latitude']
        
        # Get earthquake data within current spatial bin
        bin_data = window_data[(window_data['Longitude'] >= lon) & (window_data['Longitude'] < lon + bin_size) & 
                               (window_data['Latitude'] >= lat) & (window_data['Latitude'] < lat + bin_size)]

        # Check if there are any earthquakes in the current bin
        if not bin_data.empty:
            # Find the maximum magnitude in the current bin
            max_mag = bin_data['Magnitude'].max()

            # Store maximum magnitude in tensor
            bin_idx = j
            tensor[bin_idx, i] = max_mag

    # Increment time window
    start_time += window_size

# Add longitude and latitude columns to DataFrame
bins_df['Longitude'] += bin_size/2
bins_df['Latitude'] += bin_size/2

print(bins_df.head())
print(tensor.shape)
df = pd.concat((pd.DataFrame(tensor), bins_df), axis=1)
# df.to_csv('data/Japan_10_60_134_174_D_1973_2023.csv', index=False)


## Plot earthquake magnitudes on map

In [None]:
import cartopy.crs as crs
import cartopy
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.ticker as mticker

df = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv').sort_values('Magnitude')
# Create a map using Cartopy to display earthquake data with magnitudes, longitude, and latitude
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1, projection=crs.Mercator())
ax.add_feature(cartopy.feature.LAND, facecolor=[.8,.8,.8])
ax.add_feature(cartopy.feature.OCEAN, facecolor=[.95,.95,.95])
ax.add_feature(cartopy.feature.COASTLINE,linewidth=0.3)
ax.add_feature(cartopy.feature.BORDERS, linestyle=':',linewidth=0.3)

# Add gridlines
lon = np.linspace(-180,180,181)
lat = np.linspace(-90,90,91)

gl = ax.gridlines(draw_labels=True)
gl.xlocator = mticker.FixedLocator(lon)
gl.ylocator = mticker.FixedLocator(lat)
gl.top_labels = gl.right_labels = False
gl.rotate_labels = True
#gl.xlabel_style = {'rotation': 45}

# Add coastlines
ax.coastlines(color='black', linewidth=0.5)

# Plot the earthquake data as scatter points
sc = ax.scatter(df['Longitude'], df['Latitude'], c=df['Magnitude'], cmap="inferno", s=np.exp(df['Magnitude'])/100, transform=crs.PlateCarree())

# Set the colorbar and its label
cbar = fig.colorbar(sc, ax=ax, fraction=0.04, pad=0.02)
cbar.set_label('Magnitude')

# Set the plot title and axis labels
ax.set_title('Earthquakes between 1973 and 2023')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
# Set the bounds of the map to the minimum and maximum longitude and latitude values
# Determine the minimum and maximum longitude and latitude values
min_lon, max_lon = df['Longitude'].min(), df['Longitude'].max()
min_lat, max_lat = df['Latitude'].min(), df['Latitude'].max()
ax.set_extent([min_lon, max_lon, min_lat, max_lat], crs=crs.PlateCarree())


# Show the plot
plt.show()

## Plot earthquake magnitudes over time

In [None]:
import matplotlib.pyplot as plt

df['Time'] = pd.to_datetime(df.Time)

fig, ax = plt.subplots(figsize=(10,6))
plt.scatter(df.Time, df.Magnitude, s=.1)
plt.ylabel('Magnitude')
plt.xlabel('Year')

## Plot 2D histogram of amount of earthquakes

In [None]:
import seaborn as sns
import numpy as np
g = sns.histplot(
    df, x="Longitude", y="Latitude",
    bins=(20,25), cbar=True)

g.set_xticks(ticks=np.linspace(134, 174, 21), labels=np.linspace(134, 174, 21).astype(int), rotation = 90)

g.set_yticks(ticks=np.linspace(10, 60, 26), labels=np.linspace(10, 60, 26).astype(int))
sns.set(rc={'figure.figsize':(8,8)})


## Plot the most active 2x2 area

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv')
df['Time'] = pd.to_datetime(df.Time)
pixel = df[(df.Longitude > 140) & (df.Longitude < 142) & (df.Latitude > 36) & (df.Latitude < 38)]
fig, ax = plt.subplots(figsize=(6,4))
plt.scatter(pixel.Time, pixel.Magnitude, s=.1)
plt.ylabel('Magnitude')
plt.xlabel('Year')

## Plot distribution of earthquakes

In [None]:
import seaborn as sns
sns.jointplot(
    data=df, x="Time", y="Magnitude", s=1, marginal_ticks=True, marginal_kws=dict(bins=74)
)

## Plot correlation between pixels

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv('data/Japan_10_60_134_174_D_1973_2023.csv').iloc[:,:-2]
df = df[(df > 0).any(axis=1)]
df = df.rolling(14,axis=1, center=True, min_periods=0).mean()
plt.figure(figsize=(8, 8))
plt.matshow(np.corrcoef(df),0, cmap='seismic',vmin=-1, vmax=1)
plt.xlabel("2x2 grid pixel")
plt.ylabel("2x2 grid pixel")
plt.colorbar()
plt.show()

# Implement ARIMA on 2x2 pixel

In [None]:
# import necessary libraries
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import acf, pacf
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

# load earthquake data for a specific area
data = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv')
data['Time'] = pd.to_datetime(data.Time)
data = data[(data.Longitude > 136) & (data.Longitude < 146) & (data.Latitude > 32) & (data.Latitude < 42)]
data.set_index('Time', inplace=True)
data = data['Magnitude'].resample('W').max()  # resample by day and get the maximum magnitude of the day
data = data.fillna(0)


"""
# fit an ARIMA model to get the summary
model = ARIMA(data, order=(3, 0, 3))  # (p, d, q) order
model_fit = model.fit()
print(model_fit.summary())
"""

train_size = 52*20
total_splits = len(data)-train_size
test_size = 1

cv = TimeSeriesSplit(n_splits=total_splits, max_train_size=train_size ,test_size=test_size)

mae_total = 0
TP = 0
FP = 0
TN = 0
FN = 0

y_true = []
y_pred = []

mag = 6

for train_index, test_index in cv.split(data):
    #print("TRAIN:", train_index, "TEST:", test_index)

    # fit an ARIMA model
    model = ARIMA(data[train_index], order=(1, 1, 0))  # (p, d, q) order
    model_fit = model.fit()

    # forecast next week's magnitudes
    forecast = model_fit.forecast(steps=test_size)
    print('true:', data[test_index][0], 'prediction:', round(forecast[0],1))

    # evaluate model performance
    mae = mean_absolute_error(data[test_index], forecast)
    mae_total += mae
    #print('inermediate MSE:', mse)

    if data[test_index][0] >= mag:
        y_true.append(1)
        if forecast[0] >= mag:
            y_pred.append(1)
            TP += 1
        if forecast[0] < mag:
            FN += 1
            y_pred.append(0)
    if data[test_index][0] < mag:
        y_true.append(0)
        if forecast[0] >= mag:
            y_pred.append(1)
            FP += 1
        if forecast[0] < mag:
            y_pred.append(0)
            TN += 1

acc = (TP+TN) / (TP+TN+FP+FN)
precision = TP / (TP+FP)
recall = TP / (TP+FN)
specificity = TN / (TN+FP)

print('accuracy:', acc)
print('precision:', precision)
print('recall:', recall)
print('specificity:', specificity)
print('Mean Absolute Error:', mae_total/total_splits)

cm = confusion_matrix(y_true, y_pred)
class_names = ['M<6','M>=6']

sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()


## visualise ACF to define MA

In [None]:
acf_val = acf(data)
plt.bar(range(0,len(acf_val)),acf_val)

## visualize PACF to define AR

In [None]:
pacf_val = pacf(data)
plt.bar(range(0,len(pacf_val)),pacf_val)

# LSTM first try

In [201]:
# multivariate one step problem with lstm
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import TimeseriesGenerator
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# import data and cut off last two columns of longitude and latitude and transpose
df = pd.read_csv('data/Japan_10_60_134_174_D_1973_2023.csv').iloc[:,:-2].T

### resample timeperiod original csv and get Time
data = pd.read_csv('data/Japan_10_60_134_174_1973_2023.csv').iloc[:,4:]
data['Time'] = pd.to_datetime(data.Time)
data.set_index('Time', inplace=True)
data = data.sort_index()
time_D = data.resample('D').max().fillna(0).index

df = df.set_index(time_D)

# resample time
df = df.set_index(time_D).resample('M').max()

# only contain columns where at least a mangitude bigger than zero is recorded
dataset = df.T[(df > 0).any(axis=0)]


# transpose dataset and convert to numpy array
dataset = np.array(dataset.T)

# scale data
# scaler = StandardScaler()
# dataset = scaler.fit_transform(dataset)

# define generator
n_features = dataset.shape[1]
n_input = 12
generator = TimeseriesGenerator(dataset, dataset, length=n_input, batch_size=1)
# define model
model = Sequential()
model.add(LSTM(128, activation='tanh', input_shape=(n_input, n_features), return_sequences=True))
model.add(LSTM(128, activation='tanh', return_sequences=True))
model.add(LSTM(128, activation='tanh'))
model.add(Dense(230, activation='linear'))
model.compile(optimizer='adam', loss='mae')
# fit model
history = model.fit(generator, steps_per_epoch=230, epochs=500, verbose=1)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500

KeyboardInterrupt: 

In [None]:
emdat = pd.read_excel("/Users/jurrienboogert/Downloads/emdat_public_2023_02_08_query_uid-jHccdA.xlsx", header=6)

In [None]:
emdat[emdat['Disaster Type'] == 'Earthquake'][['Dis Mag Value']].max()

In [None]:
tsv_file='/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/NOAA/earthquakes-2023-02-11_10-24-26_+0100.tsv'
NOAA=pd.read_table(tsv_file,sep='\t')


In [None]:
NOAA[(NOAA['Total Deaths'] > 0) | (NOAA['Total Damage ($Mil)'] > 0)]['Mag']

plt.hist(NOAA[(NOAA['Total Deaths'] > 0) | (NOAA['Total Damage ($Mil)'] > 0)]['Mag'], bins=80)
plt.show()

NOAA[(NOAA['Total Deaths'] > 0) | (NOAA['Total Damage ($Mil)'] > 0)]['Mag'].quantile(.025)

In [None]:
NOAA.columns

In [None]:
pd.to_datetime(NOAA[NOAA['Year'] > 2010][['year', 'month', 'day', 'hour', 'minute']])

In [None]:
chunk1 = pd.read_csv("/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/STEAD/chunk1.csv", low_memory=False)
chunk2 = pd.read_csv("/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/STEAD/chunk2.csv", low_memory=False)
chunk3 = pd.read_csv("/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/STEAD/chunk3.csv", low_memory=False)
chunk4 = pd.read_csv("/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/STEAD/chunk4.csv", low_memory=False)
chunk5 = pd.read_csv("/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/STEAD/chunk5.csv", low_memory=False)
chunk6 = pd.read_csv("/Users/jurrienboogert/Documents/DATA_SCIENCE_AND_SOCIETY/THESIS/datasets/STEAD/chunk6.csv", low_memory=False)

chunks = pd.concat([chunk2,chunk3,chunk4,chunk5,chunk6], ignore_index=True)
chunks['trace_start_time'] = pd.to_datetime(chunks['trace_start_time'])
chunks['source_origin_time'] = pd.to_datetime(chunks['source_origin_time'])
chunks = chunks.sort_values('source_origin_time')

In [None]:
import matplotlib.pyplot as plt

subset = chunks[(chunks['source_longitude'] >= 19) & (chunks['source_longitude'] <= 30) & (chunks['source_latitude'] >= 34) & (chunks['source_latitude'] <= 44) & (chunks['source_origin_time'] <= '2015-6-25 03:14:47.900')]
fig, ax = plt.subplots(figsize=(16,6))
plt.scatter(subset.source_origin_time, subset.source_magnitude, s=1)
plt.ylabel('Magnitude')
plt.xlabel('Year')

In [None]:
plt.hist(subset.source_magnitude, bins=48)
plt.xlabel('magnitude')
plt.ylabel('Number of Earthquakes')
plt.title('Histogram of Earthquakes by magnitude')
plt.show()

In [None]:
import matplotlib.pyplot as plt

# create a histogram of the longitude values
plt.hist(subset.source_latitude, bins=360)
plt.xlabel('Longitude')
plt.ylabel('Number of Earthquakes')
plt.title('Histogram of Earthquakes by Longitude')
plt.show()

# create a histogram of the latitude values
plt.hist(subset.source_longitude, bins=360)
plt.xlabel('Latitude')
plt.ylabel('Number of Earthquakes')
plt.title('Histogram of Earthquakes by Latitude')
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np

# Load earthquake data
df = subset[chunks['source_magnitude'] >= 0].sort_values('source_magnitude')

# Extract latitude and longitude columns
latitudes = df['source_latitude']
longitudes = df['source_longitude']
magnitudes = df['source_magnitude']

# Set up map projection
fig, ax = plt.subplots(figsize=(16,6))
map = Basemap(projection='merc', lat_0=0, lon_0=0, resolution='l',
              llcrnrlon=19, llcrnrlat=33, urcrnrlon=30, urcrnrlat=43)

# Draw coastlines, countries, and states
#map.drawcoastlines(color='gray')
map.fillcontinents(color='lightgray', lake_color='white')
map.drawmapboundary(fill_color='white')

# Draw parallels and meridians
map.drawparallels(range(-90, 90, 1), linewidth=0.5, labels=[1, 0, 0, 0])
meridians = map.drawmeridians(range(-180, 180, 1), linewidth=0.5, labels=[0, 0, 0, 1])

for m in meridians:
    try:
        meridians[m][1][0].set_rotation(45)
    except:
        pass

# Convert latitude and longitude to map coordinates
x, y = map(longitudes, latitudes)

# Plot earthquake magnitudes as circles on the map
map.scatter(x, y, s=np.exp(magnitudes)/50, c=magnitudes, cmap='plasma', alpha=1)

# Add a colorbar
plt.colorbar(label='Magnitude')

# Add a title
plt.title('Earthquake Magnitudes')

# Show the plot
plt.show()


In [None]:
from prophet import Prophet

# group the data by date and location to create the input data for Prophet
data = chunks.groupby([pd.Grouper(key='source_origin_time', freq='D'), 'source_latitude', 'source_longitude']).size().reset_index(name='count')

# rename columns for use with Prophet
data = data.rename(columns={'source_origin_time': 'ds', 'count': 'y'})

# create a Prophet model and fit the data
model = Prophet()
model.fit(data)

# create a future dataframe with predictions for the next 365 days
future = model.make_future_dataframe(periods=7)

# predict the number of earthquakes for the future dates
forecast = model.predict(future)

# plot the forecast
fig = model.plot(forecast, xlabel='Date', ylabel='Number of Earthquakes')


In [None]:
plt.plot(chunks['source_origin_time'], chunks['source_magnitude'], 'ro', alpha=0.01)

In [None]:
chunks[['source_magnitude', 'trace_start_time', 'source_origin_time', 'receiver_latitude', 'receiver_longitude', 'source_latitude', 'source_longitude']].tail(20)

In [None]:
chunks[(chunks['source_origin_time'].dt.year == 2007) & (chunks['source_origin_time'].dt.month == 8) & (chunks['source_origin_time'].dt.day == 17) & (chunks['source_origin_time'].dt.hour == 0)]

In [None]:
chunks[chunks['source_magnitude'] > 6].shape

In [None]:
NOAA[(NOAA['Year'] > 1983) & (NOAA['Deaths'] > 0)].shape

In [None]:
temp = pd.read_csv("/Users/jurrienboogert/Downloads/2023.csv", header=None, names=['ID', 'YEAR/MONTH/DAY', 'ELEMENT', 'DATA VALUE', 'M-FLAG', 'Q-FLAG', 'S-FLAG', 'OBS-TIME'])

In [None]:
dly = pd.read_fwf('/Users/jurrienboogert/Downloads/ghcnd_gsn/ghcnd_gsn/USW00013782.dly')

In [None]:
source = chunks.sample(1)[['source_latitude', 'source_longitude']]
source.iloc[0,0], source.iloc[0,1]

In [None]:
# Import Meteostat library and dependencies
from datetime import datetime
from meteostat import Point, Hourly

# Set time period
start = datetime(1984, 9, 7, 2)
end = datetime(1984, 9, 7, 3)

# Create Point for Vancouver, BC
Point.method = 'nearest'
Point.radius = 200000
Point.max_count = 6
Point.weight_dist = .6
receiver = Point(source.iloc[0,0], source.iloc[0,1])
# Get daily data for 2018
data = Hourly(receiver, start, end)


In [None]:
chunks['source_origin_time'] = pd.to_datetime(chunks['source_origin_time'])
year = chunks['source_origin_time'].dt.year.fillna(0).astype('int')
month = chunks['source_origin_time'].dt.month.fillna(0).astype('int')
day = chunks['source_origin_time'].dt.day.fillna(0).astype('int')
hour = chunks['source_origin_time'].dt.hour.fillna(0).astype('int')

chunks.source_latitude
chunks.source_longitude

chunks.receiver_latitude
chunks.receiver_longitude
chunks.receiver_elevation_m

In [None]:
# Import Meteostat library and dependencies
from datetime import datetime
from meteostat import Point, Hourly

temp_source = []
rhum_source = []
pres_source = []
temp_receiver = []
rhum_receiver = []
pres_receiver = []
counter = 0

Point.method = 'nearest'
Point.radius = 2000000
Point.max_count = 10
Point.weight_dist = .6

for i in range(235426,len(chunks)):
    counter += 1
    start = datetime(year[i], month[i], day[i], hour[i])
    end = start

    source = Point(chunks.source_latitude[i], chunks.source_longitude[i])
    # Get daily data for 2018
    temp_source.append(Hourly(source, start, end).fetch()['temp'][0])
    rhum_source.append(Hourly(source, start, end).fetch()['rhum'][0])
    pres_source.append(Hourly(source, start, end).fetch()['pres'][0])

    if counter % 10 == 0:
        print(counter)


In [None]:
# Import Meteostat library and dependencies
from datetime import datetime
from meteostat import Point, Daily

temp_source = []
pres_source = []
temp_receiver = []
pres_receiver = []
counter = 0

Point.method = 'nearest'
Point.radius = 2000000
Point.max_count = 10
Point.weight_dist = .6

for i in range(235426,len(chunks)):
    counter += 1
    start = datetime(year[i], month[i], day[i])
    end = start

    source = Point(chunks.source_latitude[i], chunks.source_longitude[i])
    # Get daily data for 2018
    temp_source.append(Daily(source, start, end).fetch()['tavg'][0])
    pres_source.append(Daily(source, start, end).fetch()['pres'][0])

    Point.alt_range = chunks.receiver_elevation_m[i]
    Point.adapt_temp = True
    receiver = Point(chunks.receiver_latitude[i], chunks.receiver_longitude[i], chunks.receiver_elevation_m[i])
    # Get daily data for 2018
    temp_receiver.append(Daily(receiver, start, end).fetch()['tavg'][0])
    pres_receiver.append(Daily(receiver, start, end).fetch()['pres'][0])
    if counter % 10 == 0:
        print(counter)