# Webscraping Earthquake Data

In [4]:
import requests
import pandas as pd
from datetime import datetime, timedelta

# Define starttime and endtime
starttime = datetime.now() - timedelta(days=30)  # 30 days ago
endtime = datetime.now()  # Current time

params = {
    "format": "geojson",
    "starttime": starttime.strftime("%Y-%m-%d"),
    "endtime": endtime.strftime("%Y-%m-%d"),    
    "minmagnitude": 4,  
    "maxlatitude": 72.0, 
    "minlatitude": 35.0,  
    "maxlongitude": 40.0,  
    "minlongitude": -25.0
}

# Request data from USGS API
url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
response = requests.get(url, params=params)
data = response.json()

earthquakes = []
for quake in data["features"]:
    properties = quake["properties"]
    geometry = quake["geometry"]["coordinates"]

    earthquakes.append({
        "Time": pd.to_datetime(properties["time"], unit='ms'),  # Convert timestamp
        "Magnitude": properties["mag"],
        "Location": properties["place"],
        "Longitude": geometry[0],
        "Latitude": geometry[1],
        "Depth (km)": geometry[2]
    })

df = pd.DataFrame(earthquakes)

print(df.head())


                     Time  Magnitude                       Location  \
0 2025-04-27 18:09:38.109        4.7  11 km WNW of Doğanyol, Turkey   
1 2025-04-25 17:33:16.771        4.3   19 km S of Kumburgaz, Turkey   
2 2025-04-25 07:20:03.164        4.6       22 km N of Simav, Turkey   
3 2025-04-25 04:23:44.604        4.5       24 km N of Simav, Turkey   
4 2025-04-23 12:12:57.020        5.0     23 km S of Silivri, Turkey   

   Longitude  Latitude  Depth (km)  
0    38.9278   38.3634      12.188  
1    28.4496   40.8476      13.696  
2    28.9825   39.2908      10.000  
3    28.9654   39.3125      10.000  
4    28.2296   40.8591      10.000  


In [5]:
import sys
import os

# Dynamically find the project root and add it to the path
project_path = os.path.abspath("..")  # Fixed!
sys.path.append(project_path)

# Make sure data folder exists
os.makedirs("data", exist_ok=True)

# Now import from your src package
from src import (
    fetch_earthquake_data,
    clean_earthquake_data,
    perform_analysis,
    generate_summary_from_data,
    map_epicenters,
)

# Run the pipeline
fetch_earthquake_data("2024-03-01", "2024-03-31")
df = clean_earthquake_data()
print(f"✅ Cleaned {len(df)} earthquakes.")  # <-- Fixed here
df.head()




✅ Cleaned 488 earthquakes.


Unnamed: 0,time,place,mag,depth,longitude,latitude,type,id,url,region
0,2024-03-30 22:55:52.655,"77 km W of Panguna, Papua New Guinea",5.1,77.27,154.7807,-6.3333,earthquake,us7000m900,https://earthquake.usgs.gov/earthquakes/eventp...,"Panguna, Papua New Guinea"
1,2024-03-30 22:07:30.357,"165 km NNW of Houma, Tonga",4.8,10.0,-175.9989,-19.8295,earthquake,us7000ma6u,https://earthquake.usgs.gov/earthquakes/eventp...,"Houma, Tonga"
2,2024-03-30 21:32:36.907,"186 km NNW of Houma, Tonga",4.5,10.0,-176.178,-19.706,earthquake,us7000ma6q,https://earthquake.usgs.gov/earthquakes/eventp...,"Houma, Tonga"
3,2024-03-30 20:55:08.465,"46 km W of Port-Vila, Vanuatu",4.7,10.0,167.8799,-17.8037,earthquake,us7000ma6p,https://earthquake.usgs.gov/earthquakes/eventp...,"Port-Vila, Vanuatu"
4,2024-03-30 20:04:08.351,Kermadec Islands region,4.6,9.606,-177.8442,-27.8943,earthquake,us7000ma6x,https://earthquake.usgs.gov/earthquakes/eventp...,Kermadec Islands region


In [6]:
import sys
import os

# Dynamically find the project root and add it to the path
project_path = os.path.abspath("..")  # adjust if your notebook is deeper or higher
sys.path.append(project_path)

# Make sure data folder exists
os.makedirs("data", exist_ok=True)

# Now import from your src package
from src import (
    fetch_earthquake_data,
    clean_earthquake_data,
    perform_analysis,
    generate_summary_from_data,
    map_epicenters,
)

# Run the pipeline
fetch_earthquake_data("2024-03-01", "2024-03-31")
df = clean_earthquake_data()
print(f"✅ Cleaned {len(df)} earthquakes.")
df.head()



✅ Cleaned 488 earthquakes.


Unnamed: 0,time,place,mag,depth,longitude,latitude,type,id,url,region
0,2024-03-30 22:55:52.655,"77 km W of Panguna, Papua New Guinea",5.1,77.27,154.7807,-6.3333,earthquake,us7000m900,https://earthquake.usgs.gov/earthquakes/eventp...,"Panguna, Papua New Guinea"
1,2024-03-30 22:07:30.357,"165 km NNW of Houma, Tonga",4.8,10.0,-175.9989,-19.8295,earthquake,us7000ma6u,https://earthquake.usgs.gov/earthquakes/eventp...,"Houma, Tonga"
2,2024-03-30 21:32:36.907,"186 km NNW of Houma, Tonga",4.5,10.0,-176.178,-19.706,earthquake,us7000ma6q,https://earthquake.usgs.gov/earthquakes/eventp...,"Houma, Tonga"
3,2024-03-30 20:55:08.465,"46 km W of Port-Vila, Vanuatu",4.7,10.0,167.8799,-17.8037,earthquake,us7000ma6p,https://earthquake.usgs.gov/earthquakes/eventp...,"Port-Vila, Vanuatu"
4,2024-03-30 20:04:08.351,Kermadec Islands region,4.6,9.606,-177.8442,-27.8943,earthquake,us7000ma6x,https://earthquake.usgs.gov/earthquakes/eventp...,Kermadec Islands region


In [10]:
import matplotlib.pyplot as plt

def plot_magnitude_histogram(df):
    plt.figure(figsize=(8, 4))
    plt.hist(df["mag"], bins=10, edgecolor='black')
    plt.title("Distribution of Earthquake Magnitudes")
    plt.xlabel("Magnitude")
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.tight_layout()
    plt.show()  # 👈 Important!



In [8]:
import seaborn as sns

def plot_top_regions(df):
    top_regions = df["region"].value_counts().head(5)
    plt.figure(figsize=(8, 4))
    sns.barplot(x=top_regions.values, y=top_regions.index)
    plt.title("Top 5 Earthquake Regions")
    plt.xlabel("Count")
    plt.ylabel("Region")
    plt.show()


In [9]:
def plot_magnitude_by_region(df):
    top = df[df["region"].isin(df["region"].value_counts().index[:5])]
    plt.figure(figsize=(10, 5))
    sns.boxplot(data=top, x="region", y="mag")
    plt.title("Magnitude Distribution by Region")
    plt.ylabel("Magnitude")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
