In [None]:

import pandas as pd

# Load the CSV data
try:
  df = pd.read_csv('DATA_1.csv')
except FileNotFoundError:
  print("File 'DATA_1.csv' not found. Please upload the file to the Colab environment.")
  exit()

# Split the string column into multiple columns
df_split = df['        date          ,    lat   ,    lon   , smajax , sminax , strike, q ,  depth ,   unc , q ,  mw  ,  unc , q , s ,   mo  , fac,  mo_auth ,  mpp  ,  mpr  ,  mrr  ,  mrt  ,  mtp  ,  mtt  ,  str1  ,  dip1  , rake1  ,  str2  ,  dip2  , rake2  ,   type ,    eventid'].str.split(',', expand=True)

# Remove leading/trailing whitespace from column values
df_split = df_split.apply(lambda x: x.str.strip())

# Define column names
column_names = [
    "date", "lat", "lon", "smajax", "sminax", "strike", "q1", "depth", "unc1",
    "q2", "mw", "unc2", "q3", "s", "mo", "fac", "mo_auth", "mpp", "mpr", "mrr",
    "mrt", "mtp", "mtt", "str1", "dip1", "rake1", "str2", "dip2", "rake2", "type", "eventid"
]

# Assign column names to the dataframe
df_split.columns = column_names

# Convert 'lat' and 'lon' to numeric, handling errors
df_split['lat'] = pd.to_numeric(df_split['lat'], errors='coerce')
df_split['lon'] = pd.to_numeric(df_split['lon'], errors='coerce')

# Drop rows where 'lat' or 'lon' are NaN
df_split.dropna(subset=['lat', 'lon'], inplace=True)

In [None]:
df_split.columns

# DONT RUN THE BELOW CELL UNTIL AND UNLESS ITS IMPORTANT

In [None]:
import json
import pandas as pd
import plotly.graph_objects as go

with open('PB2002_boundaries.json') as f:
    plate_boundaries = json.load(f)

df_split['mw'] = pd.to_numeric(df_split['mw'], errors='coerce')

# For the earthquake scatter plot
fig = go.Figure(go.Scattergeo(
    lon=df_split['lon'],
    lat=df_split['lat'],
    text=[f"Magnitude: {mw:.2f}<br>Lat: {lat:.2f}<br>Lon: {lon:.2f}" 
          for lat, lon, mw in zip(df_split['lat'], df_split['lon'], df_split['mw'])],
    mode='markers',
    showlegend=False,  # Add this line
    marker=dict(
        size=df_split['mw'],
        opacity=0.8,
        color=df_split['mw'],
        colorscale='Viridis',
        colorbar=dict(title='Magnitude')
    )
))

# For the plate boundaries
for feature in plate_boundaries['features']:
    coords = feature['geometry']['coordinates']
    lons, lats = zip(*coords)
    fig.add_trace(go.Scattergeo(
        lon=lons,
        lat=lats,
        mode='lines',
        showlegend=False,  # Add this line
        line=dict(color='black', width=2),
    ))


# Customize the layout
fig.update_layout(
    title='Earthquake Locations by Magnitude',
    geo=dict(
        showland=True,
        landcolor="LightGreen",
        showocean=True,
        oceancolor="LightBlue",
        projection_type="natural earth"
    )
)

fig.show()



In [None]:
import yellowbrick
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from yellowbrick.cluster import KElbowVisualizer

# First standardize the features since they're on different scales
X = df_split[['lat', 'lon', 'mw', 'depth']]
scaler = StandardScaler()
df_split['depth'] = pd.to_numeric(df_split['depth'], errors='coerce')
df_split['mw'] = pd.to_numeric(df_split['mw'], errors='coerce')
X_scaled = scaler.fit_transform(X)

# Use Elbow Method to find optimal k
model = KMeans()
visualizer = KElbowVisualizer(model, k=(1,10))
visualizer.fit(X_scaled)
optimal_k = visualizer.elbow_value_

# Now perform clustering with the optimal k
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
df_split['cluster'] = kmeans.fit_predict(X_scaled)

# Visualize the clusters on the map


In [None]:
df_split["cluster"].unique()

In [None]:
import plotly.graph_objects as go

In [None]:
fig = go.Figure(go.Scattergeo(
    lon=df_split['lon'],
    lat=df_split['lat'],
    text=[f"Magnitude: {mw:.2f}<br>Depth: {depth:.2f}<br>Cluster: {cluster}" 
          for mw, depth, cluster in zip(df_split['mw'], df_split['depth'], df_split['cluster'])],
    mode='markers',
    marker=dict(
        size=df_split['mw'],
        opacity=0.8,
        color=df_split['cluster'],
        colorscale='Viridis',
        colorbar=dict(title='Cluster')
    )
))

fig.update_layout(
    title='Earthquake Clusters',
    showlegend=False,
    geo=dict(
        showland=True,
        landcolor="LightGreen",
        showocean=True,
        oceancolor="LightBlue",
        projection_type="natural earth"
    )
)

fig.show()

In [None]:
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
import numpy as np

# Prepare the data
X = df_split[['lat', 'lon', 'mw', 'depth']]
X_scaled = StandardScaler().fit_transform(X)

# 1. DBSCAN Clustering
dbscan = DBSCAN(eps=0.3, min_samples=5)
df_split['dbscan_cluster'] = dbscan.fit_predict(X_scaled)

# Visualize DBSCAN results
fig_dbscan = go.Figure(go.Scattergeo(
    lon=df_split['lon'],
    lat=df_split['lat'],
    text=[f"Magnitude: {float(mw):.2f}<br>Depth: {float(depth):.2f}<br>Cluster: {cluster}" 
          if pd.notnull(mw) and pd.notnull(depth)
          else f"Magnitude: N/A<br>Depth: N/A<br>Cluster: {cluster}"
          for mw, depth, cluster in zip(df_split['mw'], df_split['depth'], df_split['dbscan_cluster'])],
    mode='markers',
    marker=dict(
        size=df_split['mw'],
        opacity=0.8,
        color=df_split['dbscan_cluster'],
        colorscale='Viridis',
        colorbar=dict(title='DBSCAN Cluster')
    )
))

fig_dbscan.update_layout(
    title='Earthquake Clusters (DBSCAN)',
    showlegend=False,
    geo=dict(
        showland=True,
        landcolor="LightGreen",
        showocean=True,
        oceancolor="LightBlue",
        projection_type="natural earth"
    )
)


In [None]:
from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=5, random_state=42)
df_split['gmm_cluster'] = gmm.fit_predict(X_scaled)

# Visualize GMM results
fig_gmm = go.Figure(go.Scattergeo(
    lon=df_split['lon'],
    lat=df_split['lat'],
    text=[f"Magnitude: {float(mw):.2f}<br>Depth: {float(depth):.2f}<br>Cluster: {cluster}" 
          if pd.notnull(mw) and pd.notnull(depth)
          else f"Magnitude: N/A<br>Depth: N/A<br>Cluster: {cluster}"
          for mw, depth, cluster in zip(df_split['mw'], df_split['depth'], df_split['gmm_cluster'])],
    mode='markers',
    marker=dict(
        size=df_split['mw'],
        opacity=0.8,
        color=df_split['gmm_cluster'],
        colorscale='Viridis',
        colorbar=dict(title='GMM Cluster')
    )
))

fig_gmm.update_layout(
    title='Earthquake Clusters (Gaussian Mixture)',
    showlegend=False,
    geo=dict(
        showland=True,
        landcolor="LightGreen",
        showocean=True,
        oceancolor="LightBlue",
        projection_type="natural earth"
    )
)

# Display both plots
fig_gmm.show()