In [10]:
import pandas as pd

In [11]:
df = pd.read_csv('earthquakes.csv')

In [12]:
df.time = pd.to_datetime(df.time)
df = df.loc[df.time >= "2004-01-01"]
df = df.sort_values("time")
df = df.set_index("time")

df["region"] = df.place.str.split(", ", expand=True)[1]
df.region = df.region.fillna(df.place)
df.region = df.region.replace("CA", "California")
df.region = df.region.replace("B.C.", "Baja California")

df = df[["depth", "mag", "region", "latitude", "longitude"]]

regions = df.region.value_counts()
top_k = 25
top_k_regions = regions.head(top_k).index
df = df.loc[df.region.isin(top_k_regions)]

df = df.groupby("region").resample("d").mean()
df = df.reset_index()
df.head()

Unnamed: 0,region,time,depth,mag,latitude,longitude
0,Alaska,2004-01-01 00:00:00+00:00,35.4,1.515702,59.965255,-153.824993
1,Alaska,2004-01-02 00:00:00+00:00,34.785556,1.403333,60.790174,-151.27136
2,Alaska,2004-01-03 00:00:00+00:00,33.142029,1.52029,60.727183,-146.916419
3,Alaska,2004-01-04 00:00:00+00:00,37.425862,1.52069,60.75534,-151.792453
4,Alaska,2004-01-05 00:00:00+00:00,35.75431,1.258621,61.672058,-149.95926


In [None]:
df.mag = df.mag.ffill()
df.depth = df.depth.ffill()
df.latitude = df.latitude.ffill()
df.longitude = df.longitude.ffill()

df["hour"] = df.time.dt.hour
df["day"] = df.time.dt.day
df["month"] = df.time.dt.month
df["dayofweek"] = df.time.dt.dayofweek
df["dayofyear"] = df.time.dt.dayofyear

start_lag = 24
end_lag = 40
for i in range(start_lag, end_lag + 1):
    df[f"mag_lag_{i}"] = df.groupby("region").mag.shift(i)

df[f"depth_rolling_mean_{start_lag}"] = df.groupby("region").depth.transform(
    lambda x: x.rolling(window=start_lag)
)
df[f"latitude_rolling_mean_{start_lag}"] = df.groupby("region").latitude.transform(
    lambda x: x.rolling(window=start_lag)
)
df[f"longitude_rolling_mean_{start_lag}"] = df.groupby("region").longitude.transform(
    lambda x: x.rolling(window=start_lag)
)

In [4]:
df.head()

Unnamed: 0,region,time,depth,mag,latitude,longitude,hour,day,month,dayofweek,...,mag_lag_34,mag_lag_35,mag_lag_36,mag_lag_37,mag_lag_38,mag_lag_39,mag_lag_40,depth_rolling_mean_24,latitude_rolling_mean_24,longitude_rolling_mean_24
0,Alaska,2004-01-01 00:00:00+00:00,35.4,1.515702,59.965255,-153.824993,0,1,1,3,...,,,,,,,,"0 35.4 Name: Alaska, dtype: float64","0 59.965255 Name: Alaska, dtype: float64","0 -153.824993 Name: Alaska, dtype: float64"
1,Alaska,2004-01-02 00:00:00+00:00,34.785556,1.403333,60.790174,-151.27136,0,2,1,4,...,,,,,,,,"0 35.400000 1 34.785556 Name: Alaska, dt...","0 59.965255 1 60.790174 Name: Alaska, dt...","0 -153.824993 1 -151.271360 Name: Alaska, ..."
2,Alaska,2004-01-03 00:00:00+00:00,33.142029,1.52029,60.727183,-146.916419,0,3,1,5,...,,,,,,,,0 35.400000 1 34.785556 2 33.142029 N...,0 59.965255 1 60.790174 2 60.727183 N...,0 -153.824993 1 -151.271360 2 -146.91641...
3,Alaska,2004-01-04 00:00:00+00:00,37.425862,1.52069,60.75534,-151.792453,0,4,1,6,...,,,,,,,,0 35.400000 1 34.785556 2 33.142029 3...,0 59.965255 1 60.790174 2 60.727183 3...,0 -153.824993 1 -151.271360 2 -146.91641...
4,Alaska,2004-01-05 00:00:00+00:00,35.75431,1.258621,61.672058,-149.95926,0,5,1,0,...,,,,,,,,0 35.400000 1 34.785556 2 33.142029 3...,0 59.965255 1 60.790174 2 60.727183 3...,0 -153.824993 1 -151.271360 2 -146.91641...
