In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime as dt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.svm import SVR


Goal of this file is to create a simple model and see if it is possible to see likelyhood of a large earthquake - using ETAS and then using USGS

In [14]:
csv_file = "Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
0,12/31/59,0:03:09.00,1960.002196,-119.0502,33.979,6.5,8.2474
1,1/2/60,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1/2/60,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1/2/60,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1/2/60,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737


In [15]:
#converting the Date column into datetime format
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
0,1959-12-31,0:03:09.00,1960.002196,-119.0502,33.979,6.5,8.2474
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737


In [16]:
#filter the dataset by Date > 1960-01-01 and Date < 2023-01-1 
etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [17]:
summary_stats = etas.describe(include="all")
print(summary_stats)

                       Date        Time          Year             X  \
count                 31547       31547  31547.000000  31547.000000   
unique                18880       26489           NaN           NaN   
top     1964-04-26 00:00:00  6:49:17.00           NaN           NaN   
freq                     10           5           NaN           NaN   
first   1960-01-02 00:00:00         NaN           NaN           NaN   
last    2022-12-31 00:00:00         NaN           NaN           NaN   
mean                    NaN         NaN   1991.704948   -117.520496   
std                     NaN         NaN     18.290538      2.080386   
min                     NaN         NaN   1960.006125   -122.971200   
25%                     NaN         NaN   1975.794142   -118.711750   
50%                     NaN         NaN   1992.042660   -117.191400   
75%                     NaN         NaN   2007.554579   -116.092200   
max                     NaN         NaN   2023.001815   -113.246300   

     

  summary_stats = etas.describe(include="all")


In [18]:
X_train, X_test, y_train, y_test = train_test_split(etas["Date"], etas["Magnitude"], test_size=0.2, random_state=42)
print(X_train, X_test, y_train, y_test)
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create and train the SVR model
svr = SVR(kernel='rbf', C=1.0, epsilon=0.2)
svr.fit(X_train, y_train)

# Predict earthquake magnitudes
y_pred = svr.predict(X_test)

17212   1994-08-25
8494    1976-10-01
18514   1997-01-04
29109   2017-10-10
9400    1978-08-06
           ...    
30007   2019-07-05
5393    1970-07-18
861     1961-11-19
15905   1992-01-31
23855   2007-07-17
Name: Date, Length: 25237, dtype: datetime64[ns] 822     1961-10-31
18419   1996-11-06
6385    1972-07-17
15797   1991-11-08
3755    1967-05-15
           ...    
21399   2002-10-18
20505   2001-01-25
26666   2013-06-05
5567    1970-11-25
3217    1966-06-19
Name: Date, Length: 6310, dtype: datetime64[ns] 17212    3.07
8494     3.24
18514    3.36
29109    3.15
9400     3.01
         ... 
30007    4.09
5393     3.64
861      3.33
15905    3.13
23855    3.34
Name: Magnitude, Length: 25237, dtype: float64 822      3.26
18419    3.08
6385     3.59
15797    3.69
3755     3.65
         ... 
21399    3.10
20505    3.61
26666    3.28
5567     3.18
3217     3.42
Name: Magnitude, Length: 6310, dtype: float64


ValueError: Expected 2D array, got 1D array instead:
array=[ 7.7777280e+17  2.1297600e+17  8.5233600e+17 ... -2.5617600e+17
  6.9681600e+17  1.1846304e+18].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.