<a href="https://colab.research.google.com/github/Schimmenti/EarthquakesGPS/blob/main/Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:
import numpy as np
import pandas as pd
import pickle as pkl
import scipy.stats as stats
import matplotlib.pyplot as plt
import datetime
import time
from scipy.ndimage.filters import maximum_filter1d, minimum_filter1d
from google.colab import drive
drive.mount('/content/drive')


def max_filter1d_valid(a, W, add_heading_nan=True):
    hW = (W-1)//2 # Half window size
    if(add_heading_nan):
      return np.concatenate([np.ones(W-1)*np.nan,maximum_filter1d(a,size=W)[hW:-hW]])
    else:
      return maximum_filter1d(a,size=W)[hW:-hW]
def min_filter1d_valid(a, W, add_heading_nan=True):
    hW = (W-1)//2 # Half window size
    if(add_heading_nan):
      return np.concatenate([np.ones(W-1)*np.nan,minimum_filter1d(a,size=W)[hW:-hW]])
    else:
      return minimum_filter1d(a,size=W)[hW:-hW]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [86]:
def haversine(lat1, lat2, delta_long):
  return np.sin((lat2-lat1)/2)**2+np.cos(lat1)*np.cos(lat2)*np.sin(delta_long/2)**2

In [87]:
def moving_lin_regress(x, T, t0, t1, add_heading_nan=True):
    m = []
    for t in range(t0,t1-T):
        m.append(stats.linregress(np.arange(0,T),x[t:t+T])[0])
    if(add_heading_nan):
      return np.concatenate([np.ones(T)*np.nan, np.array(m)])
    else:
      return np.array(m)

In [88]:
base_link = "https://raw.githubusercontent.com/Schimmenti/EarthquakesGPS/main/gps_data/"

In [89]:
! wget "https://raw.githubusercontent.com/Schimmenti/EarthquakesGPS/main/gps_data/stat_info.pkl"

--2022-03-10 12:44:40--  https://raw.githubusercontent.com/Schimmenti/EarthquakesGPS/main/gps_data/stat_info.pkl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17030 (17K) [application/octet-stream]
Saving to: ‘stat_info.pkl.2’


2022-03-10 12:44:40 (11.2 MB/s) - ‘stat_info.pkl.2’ saved [17030/17030]



In [90]:
with open("stat_info.pkl","rb") as handle:
  station_names, station_pos = pkl.load(handle)

# Processing

In [7]:
stat_data = {}
for name in station_names:
  try:
    stat_data[name] = pd.read_csv(base_link + name + ".csv")
  except:
    continue

In [8]:
for name in stat_data.keys():
  #decimal_years  = stat_data[name]["DateD"].values
  #years = np.trunc(decimal_years).astype('int')
  #days_in_year = 365*np.ones(len(years), dtype=int)
  #days_in_year[days_in_year%4==0] += 1
  #days = np.round((decimal_years-years)*days_in_year).astype('int')
  #starting_date = datetime.date(years[0], 1, 1)
  #delta_days = datetime.timedelta(int(days[0]) - 1)
  #starting_date += delta_days
#
  #ending_date = datetime.date(years[-1]-1, 12, 31)
  #delta_days = datetime.timedelta(int(days[-1]))
  #ending_date += delta_days
  #print(ending_date)
  #del stat_data[name]["Date"]

  stat_data[name]['Date'] = pd.to_datetime(stat_data[name]['DateI'], format='%Y%m%d')
  stat_data[name].set_index("Date", drop=True, inplace=True)
  idx = pd.date_range(stat_data[name].index[0], stat_data[name].index[-1])
  stat_data[name] = stat_data[name].reindex(idx, fill_value=np.NaN)

In [10]:
with open("stat_data.pkl", "wb") as handle:
  pkl.dump(stat_data,handle)

In [18]:
W=9
coefs = {}
for name in stat_data.keys():
  nord = stat_data[name]["N"].values
  T_w_nord = moving_lin_regress(nord, W, 0, len(nord))
  east = stat_data[name]["E"].values
  T_w_east = moving_lin_regress(east, W, 0, len(east))
  up = stat_data[name]["U"].values
  T_w_up = moving_lin_regress(up, W, 0, len(up))
  coefs[name] = np.array([T_w_nord, T_w_east, T_w_up])

In [19]:
with open("T_W=%i.pkl"%W, "wb") as handle:
  pkl.dump(coefs,handle)

# Loading

In [91]:
catalog = pd.read_csv("https://raw.githubusercontent.com/Schimmenti/EarthquakesGPS/main/catalog.csv",sep=r"\s+", index_col="Date")

In [92]:
W=9
with open("drive/MyDrive/Colab Notebooks/T_W=%i.pkl"%W,"rb") as handle:
  coefs = pkl.load(handle)

In [93]:
with open("drive/MyDrive/Colab Notebooks/stat_data.pkl","rb") as handle:
  stat_data = pkl.load(handle)

# Excursion

In [95]:
up_scaling_factor = 4
excoefs = {}
for key in coefs.keys():
  temp = []
  for el in coefs[key]:
    temp.append(max_filter1d_valid(el, W)-min_filter1d_valid(el, W))
  #temp.append(np.sqrt(temp[0]**2+temp[1]**2+(temp[2]/up_scaling_factor)**2))
  temp = np.array(temp)
  excoefs[key] = temp

In [96]:
with open("ex_T_W=%i.pkl" %W, "wb") as handle:
  pkl.dump(excoefs,handle)

# Catalog

In [14]:
catalog = pd.read_csv("https://raw.githubusercontent.com/Schimmenti/EarthquakesGPS/main/hauksson_relocated.dat",sep=r"\s+",header=None)
catalog.columns = ["sec","m","lat","long","dep"]

In [15]:
landers = catalog[catalog["m"]==7.3]
landers_date = datetime.date(1992,6,28)
landers_time = datetime.timedelta(hours=11, minutes=57, seconds=33)
delta_time = datetime.timedelta(seconds=float(landers['sec'].values))
catalog_beginning = landers_date+landers_time-delta_time
catalog_seconds = catalog['sec'].values.astype('timedelta64[s]')
start_date = np.datetime64(catalog_beginning)
catalog_dates = start_date + catalog_seconds
year_integer =  catalog_dates.astype('datetime64[Y]').astype('int')+1970
month_integer =  catalog_dates.astype('datetime64[M]').astype('int')%12+1
day_integer = (catalog_dates- catalog_dates.astype('datetime64[M]') + 1).astype('timedelta64[D]').astype('int')+1
date_integer =(year_integer*10000+month_integer*100+day_integer)
pandas_datetime = pd.to_datetime(date_integer, format='%Y%m%d')
catalog["Date"] = pandas_datetime
catalog.set_index("Date", drop=True, inplace=True)

In [16]:
catalog.to_csv("catalog.csv", sep="\t")

# Station position

In [14]:
position_array = np.array([station_pos[key] for key in stat_data.keys()])

In [15]:
lat1 = catalog['lat'].values.reshape(-1,1)
lat2 = position_array[:,0].reshape(1,-1)
delta_long = catalog['long'].values.reshape(-1,1)-position_array[:,1].reshape(1,-1)

In [16]:
distances = haversine(lat1,lat2,delta_long)

In [17]:
closest_station_idx = np.argsort(distances, axis=1)

In [28]:
threshold_dist = 1e-3
max_number = 20

In [12]:
m_day = catalog["m"].groupby("Date").apply(lambda x : (2/3)*np.log10(np.sum(10**(1.5*x))))
true_days = m_day.index
m_day = m_day.values
lat_day = catalog["lat"].groupby("Date").mean().values
long_day = catalog["long"].groupby("Date").mean().values
dep_day = catalog["dep"].groupby("Date").mean().values

In [13]:
catalog_day = pd.DataFrame(columns = ["m", "lat", "long", "dep" ])

In [14]:
catalog_day["m"] = m_day
catalog_day["lat"] = lat_day
catalog_day["long"] = long_day
catalog_day["dep"] = dep_day
catalog_day.index = true_days

In [15]:
catalog_day.to_csv("catalog_day.csv", sep="\t")