In [27]:
import json
import requests 
import urllib
from pathlib import Path
from datetime import datetime
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

Here we import all the necessary tools so that the necessary dependancies are globaly declared

In [28]:
def collect_data(start_time,end_time,min_magnitude=5):
    #Here we write down the URL of the chosen API and get the format ready for data collection
    api_url = "https://earthquake.usgs.gov/fdsnws/event/1/query"

    #Below are the chosen parameters which will be used for the visualisation of data, cleaning will be done later
    params = {
    "format": "geojson",
    "starttime": start_time,
    "endtime": end_time,
    "minmagnitude": min_magnitude,

    }
    #Attempt to pull from the API using the chosen paramteres
    api_data = requests.get(api_url, params=params)
    #Create an aempty dataframe
    earthquake_data=pd.DataFrame()   
    # Check if the request was successful and if so, begin collection
    if api_data.status_code == 200:
        data = api_data.json()
        # Extract earthquake features
        features = data['features']

        
        
        # Parse the data and store in a DataFrame
        earthquake_list = []
        for feature in features:
            properties = feature['properties']
            geometry = feature['geometry']
            #Rather than making a list and continuously apppending to it then converting to a dataframe, we concat the current
            # dataframe we have with a newly created one (one created by iterating through all features we want)
            earthquake_data=pd.concat([earthquake_data, pd.DataFrame([{
                "eventid": feature['id'],
                "time": properties['time'],
                "latitude": geometry['coordinates'][1],
                "longitude": geometry['coordinates'][0],
                "country": properties['place'],
                "depth": geometry['coordinates'][2],
                "magnitude": properties['mag'],
                
            }])]) 

        return earthquake_data

    else:
        print("Error, there is an issue with data rretrieval, error code, ", api_data.status_code)
        return earthquake_data


In the above function,

In [29]:
full_earthquake_data=pd.DataFrame()

#Here we loop through the last 24 years, one by one, to prevent any errors from too large of data
for i in range(2000,2024):
    start=str(i)+("-01-01")
    end=str(i)+("-12-31")
    partial_earthquake_data=collect_data(start,end)
    full_earthquake_data = pd.concat([full_earthquake_data,partial_earthquake_data])

#As the API collects dta till now, we want to add any data collected till now
partial_earthquake_data=collect_data("2024-01-01",datetime.today().strftime('%Y-%m-%d'))
full_earthquake_data = pd.concat([full_earthquake_data,partial_earthquake_data])

# Save the dataset to a CSV file
csv_file = "raw_earthquake_data.csv"
full_earthquake_data.to_csv(csv_file, index=False)

# Output message
print(f"Data collection complete. Dataset saved as ", csv_file, "to path ", Path.cwd())


Data collection complete. Dataset saved as  raw_earthquake_data.csv to path  d:\University_Work\Data Science
