# Location Analysis

In [267]:
import pandas as pd
import numpy as np
import datetime as dt

In [268]:
csv_file = "../datasets/Formatted_ETAS_Output.csv"
etas = pd.read_csv(csv_file, sep = ',', lineterminator='\n')
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})


Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.



## Data Filtering
1. Converting the date columns to datetime
2. Date > 1960-01-01 and < 2023-01-01
3. Longitude > -123 and < -113
4. Latitude > 29 and < 39s

In [269]:
etas["Date"] = pd.to_datetime(etas["Date"], errors="coerce", format="%m/%d/%y")
etas.loc[etas["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)

etas = etas[(etas['Date'] > pd.to_datetime('1960-01-01')) & (etas['Date'] < pd.to_datetime('2023-01-01'))]

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
etas = etas[etas['X'] > -123]
etas = etas[etas['X'] < -113]
etas = etas[etas['Y'] < 39]
etas = etas[etas['Y'] > 29]

In [270]:
etas.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z\r
1,1960-01-02,0:08:49.00,1960.006125,-115.6222,33.0793,4.25,7.9322
2,1960-01-02,0:10:31.00,1960.007305,-115.6323,33.122,3.03,8.4015
3,1960-01-02,0:10:32.00,1960.00732,-115.5851,33.0745,3.03,7.9678
4,1960-01-02,0:11:07.00,1960.00772,-115.6256,33.029,3.08,7.9737
5,1960-01-02,0:11:17.00,1960.00784,-115.605,33.0276,3.61,7.9322


In [271]:
# Converting the date column into datetime format
usgs["Date"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs.drop(columns=["time"], inplace=True)
usgs = usgs[(pd.to_datetime(usgs['Date']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['Date']) < pd.to_datetime('2023-01-01'))]

usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')

#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]

In [272]:
usgs.head()

Unnamed: 0,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource\r,Date
240,33.3975,-116.393333,3.88,4.14,mw,132,16,0.07391,0.19,ci,...,"16 km N of Borrego Springs, CA",earthquake,0.1,0.38,,6,reviewed,ci,ci\r,2022-12-31
241,34.355667,-116.921833,4.73,3.47,mw,121,25,0.07845,0.15,ci,...,"11km SSE of Lucerne Valley, CA",earthquake,0.09,0.41,,4,reviewed,ci,ci\r,2022-12-31
246,37.620167,-122.025,3.82,3.34,mw,141,16,,0.16,nc,...,"3km N of Union City, CA",earthquake,0.1,0.17,,3,reviewed,nc,nc\r,2022-12-22
262,37.918167,-122.304,5.48,3.57,mw,170,19,0.01598,0.15,nc,...,"1km ENE of El Cerrito, CA",earthquake,0.1,0.17,,4,reviewed,nc,nc\r,2022-12-17
263,36.604667,-121.209333,8.88,3.28,ml,67,55,0.03812,0.09,nc,...,"10km NW of Pinnacles, CA",earthquake,0.14,0.28,0.129,72,reviewed,nc,nc\r,2022-12-13


## Energy Calculation And Filtering
Converting the magnitudes to energy through the formula: (1/1.5) * log(10^(1.5*mag)) and graphing it

In [273]:
const = (1/1.5)
etas['Energy'] = 10**(1.5*etas['Magnitude'])
etas['Energy'] = np.log(etas['Magnitude'])*const

const = (1/1.5)
usgs['Energy'] = 10**(1.5*usgs['mag'])
usgs['Energy'] = np.log(usgs['mag'])*const

In [274]:
import plotly.express as px
import plotly.graph_objects as go

In [275]:
fig = px.scatter(etas, x='X', y='Y', color='Energy', color_continuous_scale='viridis', title='Earthquake Locations ETAS (Energy)')
fig.update_layout(width=800, height=800)
fig.show()

In [276]:
fig = px.scatter(usgs, x='longitude', y='latitude', color='Energy', color_continuous_scale='viridis', title='Earthquake Locations USGS (Energy)')
fig.update_layout(width=800, height=800)
fig.show()

## Assinging Grid Labels
Grouping the eathquakes by locational grids and assigning labels to them

In [277]:
#size of the grids
lat_grid_size = 0.5
lon_grid_size = 0.5

In [278]:
def assign_grid_labels_usgs(row):
    lon_label = int(row['longitude'] // lon_grid_size)
    lat_label = int(row['latitude'] // lat_grid_size)
    return f'Grid_{lon_label}_{lat_label}'

def assign_grid_labels_etas(row):
    lon_label = int(row['X'] // lon_grid_size)
    lat_label = int(row['Y'] // lat_grid_size)
    return f'Grid_{lon_label}_{lat_label}'


In [279]:
etas['grid_label'] = etas.apply(assign_grid_labels_etas, axis=1)
grid_etas = {}
for label, group in etas.groupby('grid_label'):
    grid_etas[label] = group

usgs['grid_label'] = usgs.apply(assign_grid_labels_usgs, axis=1)
grid_usgs  = {}
for label, group in usgs.groupby('grid_label'):
    grid_usgs[label] = group
    
print(grid_etas)
print(len(grid_etas))

print(grid_usgs)
print(len(grid_usgs))

{'Grid_-227_58':             Date         Time         Year         X        Y  Magnitude  \
5845  1971-06-12  10:44:22.00  1971.447481 -113.4020  29.3520       3.11   
10195 1980-04-18   7:09:06.00  1980.297994 -113.4280  29.4560       3.27   
11681 1983-03-16   4:58:03.00  1983.206983 -113.4313  29.2442       4.34   
14184 1988-08-20  15:18:51.00  1988.638092 -113.4313  29.2442       3.12   
14230 1988-10-01  18:02:07.00  1988.751476 -113.4020  29.3520       3.07   
19905 1999-11-11  20:43:00.00  1999.863198 -113.4280  29.4560       3.10   
20736 2001-06-30  11:57:28.00  2001.498244 -113.4020  29.3520       3.29   
24730 2009-04-04   6:11:24.00  2009.257917 -113.4020  29.3520       3.22   
25415 2010-09-27  17:47:14.00  2010.741136 -113.4313  29.2442       3.42   
25508 2010-12-12  22:48:50.00  2010.950584 -113.4280  29.4560       3.05   
27522 2015-01-10   0:41:01.00  2015.028491 -113.4731  29.2678       3.89   
27523 2015-01-10   0:41:20.00  2015.028715 -113.4165  29.2146       3.0

In [280]:
grid_usgs = usgs.groupby('grid_label')['Energy'].sum().reset_index()
fig = go.Figure()

scatter = go.Scatter(
    x=usgs['longitude'],
    y=usgs['latitude'],
    mode='markers',
    marker=dict(color=usgs.groupby('grid_label')['Energy'].transform('sum'), colorscale='Viridis', size=7, colorbar=dict(title='Cumulative Energy')),
)

fig.add_trace(scatter)

fig.update_layout(
    title='Cumulative Energy of Each Grid (USGS)',
    width=800,
    height=800,
    xaxis=dict(dtick=lon_grid_size),
    yaxis=dict(dtick=lat_grid_size),
    xaxis_title='Longitude',
    yaxis_title='Latitude',
)
fig.show()

In [281]:
fig = go.Figure()

# Create a color map for unique colors for each grid
colors = px.colors.qualitative.Pastel

# Iterate through the grid_dataframes and add scatter plots to the figure
for i, (label, grid_df) in enumerate(grid_usgs.items()):
    color = colors[i % len(colors)]  # Cycle through the color map
    scatter = px.scatter(grid_df, x='longitude', y='latitude', color_discrete_sequence=[color])
    fig.add_trace(scatter.data[0])  # Add the trace to the main figure

# Display the combined figure
fig.update_layout(title='Earthquake Grids (USGS)')
fig.update_layout(width=800, height=800)
fig.show()

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['grid_label'] but received: longitude