# Relationship Between Magnitude And Multiplicity

In [40]:
import pandas as pd
import numpy as np
import datetime as dt

In [41]:
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})


Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.



## Data Filtering
1. Converting the date columns to datetime
2. Date > 1960-01-01 and < 2023-01-01
3. Longitude > -123 and < -113
4. Latitude > 29 and < 39

In [42]:
#filter the dataset by Date > 1960-01-01 and Date < 2023-01-1 
usgs["time"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs = usgs[(pd.to_datetime(usgs['time']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['time']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

usgs.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r
240,2022-12-31,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,...,2023-09-22T21:50:30.029Z,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r
241,2022-12-31,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,...,2023-03-07T19:00:01.040Z,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r
246,2022-12-22,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,...,2023-04-20T04:34:00.806Z,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r
262,2022-12-17,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,...,2023-07-27T08:15:34.318Z,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r
263,2022-12-13,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,...,2023-02-18T22:04:08.040Z,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r


In [43]:
time = []
for i in usgs['time']:
    time.append(pd.to_datetime(i))
usgs['time'] = time

## Data Grouping
Data is grouped into 1 week chunks

In [44]:
usgs_grouped_counts = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('M')).mag.count())
usgs_grouped_counts.rename(columns={'mag':'count'}, inplace=True)

usgs_grouped_max = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('M')).mag.max())

In [45]:
import plotly.express as px
import plotly.graph_objects as go

In [46]:
# from sklearn.preprocessing import StandardScaler

## Data Scaling and Merging
<!-- Data is scaled using a Standard Scaler and merged -->
Data is scaled and merged

In [47]:
# scaler = StandardScaler()

# x_vals = np.array(usgs_grouped_counts["count"]).reshape(-1,1)
# scaled_counts_arr = scaler.fit_transform(x_vals)

# x_vals = np.array(usgs_grouped_max["mag"]).reshape(-1,1)
# scaled_max_arr = scaler.fit_transform(x_vals)

In [48]:
merged_earthquake_data = usgs_grouped_counts.copy()
merged_earthquake_data['count'] = usgs_grouped_counts["count"]
merged_earthquake_data['mag'] = usgs_grouped_max["mag"]
merged_earthquake_data = merged_earthquake_data.reset_index()
merged_earthquake_data.head()

Unnamed: 0,time,count,mag
0,1960-01,12,5.0
1,1960-02,8,3.79
2,1960-03,6,3.67
3,1960-04,9,4.24
4,1960-05,6,4.0


In [49]:
merged_earthquake_data['time'] = merged_earthquake_data['time'].dt.to_timestamp()

# Normalizing the columns manually
merged_earthquake_data['count_normalized'] = (merged_earthquake_data['count'] - merged_earthquake_data['count'].mean()) / merged_earthquake_data['count'].std()
merged_earthquake_data['mag_normalized'] = (merged_earthquake_data['mag'] - merged_earthquake_data['mag'].mean()) / merged_earthquake_data['mag'].std()

fig = px.line(merged_earthquake_data, x='time', y=['count_normalized', 'mag_normalized'], labels={'value': 'Normalized Value'}, title='Scaled Earthquake Magnitude vs Multiplicity')

fig.update_layout(
    width=900,
    height=600,
    dragmode='pan',
)
fig.update_traces(line=dict(width=1.25))
fig.show()

## Correlation Between Magnitude and Multiplicity

General correlation between magnitude and multiplicity

In [50]:
correlation_coefficient = merged_earthquake_data['count'].corr(merged_earthquake_data['mag'])
print(correlation_coefficient)

0.4993532378709277


Correlation between top 15 highest earthquake count months and the max earthquake magnitudes they had

In [51]:
scaled_earthquake_count = merged_earthquake_data.copy()
scaled_earthquake_count = merged_earthquake_data.nlargest(15, 'count')
correlation_coefficient = scaled_earthquake_count['count'].corr(scaled_earthquake_count['mag'])
print(correlation_coefficient)

0.389139787162484


Correlation between top 15 largest earthquakes and the earthquake counts they had in that month

In [52]:
scaled_earthquake_mag = merged_earthquake_data.copy()
scaled_earthquake_mag = merged_earthquake_data.nlargest(15, 'mag')
correlation_coefficient = scaled_earthquake_mag['mag'].corr(scaled_earthquake_mag['count'])
print(correlation_coefficient)

0.6605184452130431


We can see there is a significant correlation for a large amount of earthquakes happening whenever there is a large earthquake ... 
If we filter the dataset by earthquakes with magnitude >= 3.4 this correlation decreases to .60, showing that there are more small earthquakes happening

The next file will show the time before/after large earthquakes to see this more effectively