In [1]:
import requests
import pandas as pd
import os

# Define the output file path
output_path = r"C:\Users\thoma\Documents\GitHub\USGS\earthquake_data.csv"

# USGS API URL and parameters
url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
min_magnitude = 3.5

# Define the starting and ending year
start_year = 2004
end_year = 2025

# Create an empty list to store data from all years
all_data = []
all_geometries = []  # New list to store geometries

# Loop through each year to pull the data
for year in range(start_year, end_year):
    start_time = f"{year}-01-01"
    end_time = f"{year}-12-31"
    
    params = {
        "format": "geojson",
        "starttime": start_time,
        "endtime": end_time,
        "minmagnitude": min_magnitude,
    }
    
    # Make the request to USGS
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        features = data.get('features', [])
        
        # Extract both properties and geometry
        for feature in features:
            properties = feature['properties']
            geometry = feature['geometry']
            
            # Add coordinates to properties
            properties['latitude'] = geometry['coordinates'][1]
            properties['longitude'] = geometry['coordinates'][0]
            properties['depth'] = geometry['coordinates'][2]
            
            all_data.append(properties)
        print(f"Successfully retrieved data for {start_time} to {end_time}")
    else:
        print(f"Failed to retrieve data for {start_time} to {end_time} (Status Code: {response.status_code})")

# Create DataFrame and perform cleaning
df = pd.DataFrame(all_data)

# Convert timestamps to datetime
df['datetime'] = pd.to_datetime(df['time'], unit='ms')

# Clean location data
df[['distance_km', 'location']] = df['place'].str.extract(r'(\d+)\s*km\s+(.+)')
df.loc[~df['distance_km'].notna(), 'location'] = df.loc[~df['distance_km'].notna(), 'place']
df['distance_km'] = pd.to_numeric(df['distance_km'], errors='coerce')

# Handle missing values
numeric_cols = ['felt', 'cdi', 'mmi', 'nst', 'dmin', 'rms', 'gap']
df[numeric_cols] = df[numeric_cols].fillna(-1)

# Drop redundant columns and rename for clarity
columns_to_drop = ['time', 'updated', 'tz', 'url', 'detail', 'ids', 'sources', 'types']
df_clean = df.drop(columns=columns_to_drop)

# Rename columns for clarity
column_renames = {
    'mag': 'magnitude',
    'cdi': 'community_intensity',
    'mmi': 'modified_mercalli_intensity',
    'sig': 'significance',
    'nst': 'num_stations',
    'dmin': 'min_distance',
    'rms': 'root_mean_square',
    'magType': 'magnitude_type'
}
df_clean = df_clean.rename(columns=column_renames)

# Reorder columns for better organization
first_cols = ['datetime', 'magnitude', 'location', 'distance_km', 'latitude', 'longitude', 'depth']
other_cols = [col for col in df_clean.columns if col not in first_cols]
df_clean = df_clean[first_cols + other_cols]

# Save the cleaned dataset
cleaned_output_path = r"C:\Users\thoma\Documents\GitHub\USGS\earthquake_data_cleaned.csv"
df_clean.to_csv(cleaned_output_path, index=False)

# Display information about the cleaned dataset
print("\nCleaned dataset info:")
df_clean.info()

Successfully retrieved data for 2004-01-01 to 2004-12-31
Failed to retrieve data for 2005-01-01 to 2005-12-31 (Status Code: 400)
Successfully retrieved data for 2006-01-01 to 2006-12-31
Successfully retrieved data for 2007-01-01 to 2007-12-31
Failed to retrieve data for 2008-01-01 to 2008-12-31 (Status Code: 400)
Successfully retrieved data for 2009-01-01 to 2009-12-31
Successfully retrieved data for 2010-01-01 to 2010-12-31
Successfully retrieved data for 2011-01-01 to 2011-12-31
Successfully retrieved data for 2012-01-01 to 2012-12-31
Successfully retrieved data for 2013-01-01 to 2013-12-31
Successfully retrieved data for 2014-01-01 to 2014-12-31
Successfully retrieved data for 2015-01-01 to 2015-12-31
Successfully retrieved data for 2016-01-01 to 2016-12-31
Successfully retrieved data for 2017-01-01 to 2017-12-31
Successfully retrieved data for 2018-01-01 to 2018-12-31
Successfully retrieved data for 2019-01-01 to 2019-12-31
Successfully retrieved data for 2020-01-01 to 2020-12-31
S

In [3]:
import pandas as pd
import numpy as np
import folium
from folium import plugins
import os

# Read the data
df = pd.read_csv(r"C:\Users\thoma\Documents\GitHub\USGS\earthquake_data_cleaned.csv")

# Sample the data (adjust the fraction as needed)
# Let's start with 10% of the data, randomly sampled
sample_size = 0.1
df_sampled = df.sample(frac=sample_size, random_state=42)

# Create base map
center_lat = df_sampled['latitude'].median()
center_lon = df_sampled['longitude'].median()
m = folium.Map(location=[center_lat, center_lon], zoom_start=3)

# Add marker cluster
marker_cluster = plugins.MarkerCluster().add_to(m)

# Add clustered points
for idx, row in df_sampled.iterrows():
    # Create popup text
    popup_text = f"""
    Magnitude: {row['magnitude']:.1f}<br>
    Depth: {row['depth']:.1f} km<br>
    Location: {str(row['location'])}<br>
    Date: {row['datetime']}
    """
    
    # Add to cluster
    folium.CircleMarker(
        location=[float(row['latitude']), float(row['longitude'])],
        radius=float(row['magnitude']),  # Smaller radius
        popup=popup_text,
        color=get_color(float(row['depth'])),
        fill=True,
        fillOpacity=0.7
    ).add_to(marker_cluster)

# Add heatmap layer using all data points
heat_data = [[row['latitude'], row['longitude'], row['magnitude']] for idx, row in df.iterrows()]
plugins.HeatMap(heat_data, min_opacity=0.2, radius=15).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Add legend
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; right: 50px; width: 150px; height: 90px; 
            border:2px solid grey; z-index:9999; 
            background-color:white;
            opacity:0.8;
            padding: 10px;
            font-size:14px;">
            <b>Depth (km)</b><br>
            <i class="fa fa-circle fa-1x" style="color:red"></i> < 50<br>
            <i class="fa fa-circle fa-1x" style="color:orange"></i> 50-100<br>
            <i class="fa fa-circle fa-1x" style="color:yellow"></i> 100-300<br>
            <i class="fa fa-circle fa-1x" style="color:green"></i> > 300
</div>
'''
m.get_root().html.add_child(folium.Element(legend_html))

# Save the map
map_path = r"C:\Users\thoma\Documents\GitHub\USGS\earthquake_map_optimized.html"
m.save(map_path)

# Print statistics
print(f"\nVisualization created with {len(df_sampled)} points ({sample_size*100:.1f}% of total data)")
print(f"Total earthquakes in dataset: {len(df)}")
print(f"Map saved to: {map_path}")


Visualization created with 30311 points (10.0% of total data)
Total earthquakes in dataset: 303111
Map saved to: C:\Users\thoma\Documents\GitHub\USGS\earthquake_map_optimized.html


In [6]:
import pandas as pd
import numpy as np
import folium
from folium import plugins
import os

# Read the data
df = pd.read_csv(r"C:\Users\thoma\Documents\GitHub\USGS\earthquake_data_cleaned.csv")

# Filter for major earthquakes (magnitude >= 6.0)
major_quakes = df[df['magnitude'] >= 6.0].copy()

# Create base map with a darker style
center_lat = major_quakes['latitude'].median()
center_lon = major_quakes['longitude'].median()
m = folium.Map(location=[center_lat, center_lon], zoom_start=3, 
               tiles='CartoDB dark_matter')

# Add marker cluster
marker_cluster = plugins.MarkerCluster().add_to(m)

# Create a colormap for magnitude
def get_magnitude_color(magnitude):
    if magnitude >= 8.0:
        return '#FF1E1E'  # Bright red
    elif magnitude >= 7.0:
        return '#FF9000'  # Bright orange
    else:
        return '#FFD700'  # Gold

# Add earthquake points to cluster
for idx, row in major_quakes.iterrows():
    # Create popup text
    popup_text = f"""
    <div style="font-family: Arial; width: 200px;">
        <h4 style="margin: 0; color: {'#FF1E1E' if row['magnitude'] >= 8.0 else '#FF9000' if row['magnitude'] >= 7.0 else '#FFD700'}">
            Magnitude {row['magnitude']:.1f}
        </h4>
        <hr style="margin: 4px 0;">
        <b>Date:</b> {row['datetime']}<br>
        <b>Depth:</b> {row['depth']:.1f} km<br>
        <b>Location:</b> {str(row['location'])}
    </div>
    """
    
    # Add circle marker
    folium.CircleMarker(
        location=[float(row['latitude']), float(row['longitude'])],
        radius=float(row['magnitude']) * 1.5,
        popup=folium.Popup(popup_text, max_width=300),
        color=get_magnitude_color(row['magnitude']),
        fill=True,
        fillOpacity=0.7,
        weight=2
    ).add_to(marker_cluster)

# Add heatmap layer
heat_data = [[row['latitude'], row['longitude']] for idx, row in major_quakes.iterrows()]
plugins.HeatMap(
    heat_data,
    min_opacity=0.2,
    radius=25,
    blur=15,
    max_zoom=1,
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Add a more stylish legend
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; right: 50px; width: 200px;
            border:2px solid rgba(255, 255, 255, 0.2);
            border-radius: 10px;
            z-index:9999; 
            background-color:rgba(0, 0, 0, 0.7);
            color: white;
            padding: 15px;
            font-family: Arial;">
            <h4 style="margin: 0 0 10px 0;">Major Earthquakes</h4>
            <div style="margin: 5px 0;">
                <i class="fa fa-circle fa-1x" style="color:#FF1E1E"></i> Magnitude ≥ 8.0
            </div>
            <div style="margin: 5px 0;">
                <i class="fa fa-circle fa-1x" style="color:#FF9000"></i> Magnitude 7.0 - 7.9
            </div>
            <div style="margin: 5px 0;">
                <i class="fa fa-circle fa-1x" style="color:#FFD700"></i> Magnitude 6.0 - 6.9
            </div>
</div>
'''
m.get_root().html.add_child(folium.Element(legend_html))

# Save the map
map_path = r"C:\Users\thoma\Documents\GitHub\USGS\major_earthquakes_styled.html"
m.save(map_path)

# Print statistics
print("\nMajor Earthquakes Statistics (Magnitude ≥ 6.0):")
print(f"Total number of major earthquakes: {len(major_quakes)}")
print("\nBreakdown by magnitude range:")
print(f"8.0 or higher: {len(major_quakes[major_quakes['magnitude'] >= 8.0])}")
print(f"7.0 to 7.9: {len(major_quakes[(major_quakes['magnitude'] >= 7.0) & (major_quakes['magnitude'] < 8.0)])}")
print(f"6.0 to 6.9: {len(major_quakes[(major_quakes['magnitude'] >= 6.0) & (major_quakes['magnitude'] < 7.0)])}")
print(f"\nMap saved to: {map_path}")


Major Earthquakes Statistics (Magnitude ≥ 6.0):
Total number of major earthquakes: 2807

Breakdown by magnitude range:
8.0 or higher: 23
7.0 to 7.9: 267
6.0 to 6.9: 2517

Map saved to: C:\Users\thoma\Documents\GitHub\USGS\major_earthquakes_styled.html
