In [None]:
import pandas as pd
import plotly.express as px

# LOAD DATA from the CSV and Convert time column to datetime

In [None]:
df = pd.read_csv("quakes-cleaned.csv")
df['time'] = pd.to_datetime(df['time'], errors='coerce', format='mixed')
df = df[df['mag'] > 0]

# 1. Top 5 High-Risk Zones (Geographic Map)

In [None]:
high_risk_zones = df.groupby(['latitude', 'longitude']).size().reset_index(name='count')
high_risk_zones = high_risk_zones.sort_values(by='count', ascending=False).head(5)

high_risk_map = px.scatter_geo(
    high_risk_zones,
    lat='latitude',
    lon='longitude',
    size='count',
    color='count',
    title="Top 5 High-Risk Zones (Geographic Map)",
    labels={"count": "Earthquake Count"},
    projection="natural earth"
)

high_risk_map.update_traces(
    marker=dict(line=dict(width=2, color='DarkSlateGrey')),
    selector=dict(mode='markers')
)

high_risk_map.update_layout(
    title_font_size=20,
    geo=dict(showland=True, landcolor="lightgrey", showcountries=True),
)

high_risk_map.show()

# 2. Most Seismic Activity in a Local Area (Focus on Most Active Zone)

In [None]:
most_seismic_area = high_risk_zones.iloc[0]

most_seismic_area_plot = px.scatter_geo(
    df,
    lat='latitude',
    lon='longitude',
    size='mag',
    title=f"Most Seismic Activity: Lat {most_seismic_area['latitude']}, Lon {most_seismic_area['longitude']}",
    labels={"latitude": "Latitude", "longitude": "Longitude", "mag": "Magnitude"}
)
most_seismic_area_plot.add_scattergeo(
    lat=[most_seismic_area['latitude']],
    lon=[most_seismic_area['longitude']],
    marker=dict(size=15, color='red'),
    name="Most Active Zone"
)
most_seismic_area_plot.show()

# 3. Most Affected Places (Bar Chart)

In [None]:
most_affected_places = df['place'].value_counts().reset_index().head(10)
most_affected_places.columns = ['place', 'count']

most_affected_places_plot = px.bar(
    most_affected_places,
    x='place',
    y='count',
    text='count',
    title="Top 10 Most Affected Places",
    labels={"place": "Place", "count": "Earthquake Count"}
)
most_affected_places_plot.update_xaxes(tickangle=45)  # Rotate x-axis labels for better readability
most_affected_places_plot.show()

# 4. Earthquake Frequency by Hour (Temporal Analysis)

In [None]:
df['hour'] = df['time'].dt.hour
earthquake_frequency_by_hour = df['hour'].value_counts().sort_index().reset_index()
earthquake_frequency_by_hour.columns = ['hour', 'frequency']

hourly_frequency_plot = px.line(
    earthquake_frequency_by_hour,
    x='hour',
    y='frequency',
    title="Earthquake Frequency by Hour",
    labels={"hour": "Hour of Day (0-23)", "frequency": "Earthquake Count"}
)
hourly_frequency_plot.show()

# 5. Average Magnitude of Earthquakes (Distribution)

In [None]:
avg_magnitude = df['mag'].mean()

magnitude_distribution_plot = px.histogram(
    df,
    x='mag',
    nbins=50,
    title=f"Distribution of Earthquake Magnitudes (Average: {avg_magnitude:.2f})",
    labels={"mag": "Magnitude", "count": "Frequency"}
)
magnitude_distribution_plot.add_vline(
    x=avg_magnitude,
    line_dash="dash",
    line_color="red",
    annotation_text=f"Avg Magnitude: {avg_magnitude:.2f}"
)
magnitude_distribution_plot.show()