In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import requests
import tqdm

In [3]:
MIN_TIME = "100-01-01"
START_TIME = '1940-01-01'
END_TIME = '2023-10-01'
MIN_LAT = -90
MAX_LAT = 90
MIN_LON = -180
MAX_LON = 180

In [3]:
def make_params(starttime, endtime, minlatitude, maxlatitude, minlongitude, maxlongitude):
    params = {
        "format": "geojson",
        "starttime": starttime,
        "endtime": endtime,
        "minlatitude": minlatitude,
        "maxlatitude": maxlatitude,
        "minlongitude": minlongitude,
        "maxlongitude": maxlongitude
    }
    return params

In [4]:
def make_datarange(start_time, end_time, min_time):
    datarange = pd.date_range(start_time, end_time, freq='W').tolist()
    datarange = [str(x)[:10] for x in datarange]
    datarange.insert(0, min_time)
    return datarange

In [5]:
def get_earthquake_count(params):
    url = "https://earthquake.usgs.gov/fdsnws/event/1/count"
    response = requests.get(url, params=params)
    return response.json()["count"]

In [6]:
def get_earthquake_data(params):
    url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
    response = requests.get(url, params=params)
    return response

In [7]:
def make_df(resp, params, errors):
    all_eqs = []
    try:
        for eq in resp.json()["features"]:
            prop = list(eq["properties"].values())
            prop.extend(eq["geometry"]["coordinates"])
            all_eqs.append(prop)
        cols = list(resp.json()["features"][0]["properties"].keys())
        cols.extend(["longitude", "latitude", "depth"])
        df = pd.DataFrame(all_eqs, columns=cols)
    except:
        errors.append(params)
        df = pd.DataFrame()
    return df, errors

In [8]:
def download_data(starttime, endtime, minlatitude, maxlatitude, minlongitude, maxlongitude):
    dfs = []
    errors = []
    data_range = make_datarange(starttime, endtime, MIN_TIME)
    for i in tqdm.tqdm(range(len(data_range)-1)):
        params = make_params(data_range[i], data_range[i+1], minlatitude, maxlatitude, minlongitude, maxlongitude)
        resp = get_earthquake_data(params)
        eq_count = get_earthquake_count(params)
        df, errors = make_df(resp, params, errors)
        if len(df) != eq_count or eq_count > 20000:
            errors.append(params)
            print("Error: Dataframe length does not match earthquake count")
            print(params)
        dfs.append(df)
    df = pd.concat(dfs)
    return df, errors

In [9]:
df, errors = download_data(START_TIME, END_TIME, MIN_LAT, MAX_LAT, MIN_LON, MAX_LON)
df

  0%|          | 0/4370 [00:00<?, ?it/s]

100%|██████████| 4370/4370 [2:33:31<00:00,  2.11s/it]  


Unnamed: 0,mag,place,time,updated,tz,url,detail,felt,cdi,mmi,...,nst,dmin,rms,gap,magType,type,title,longitude,latitude,depth
0,5.84,"14 km NE of Eloúnda, Greece",-946270524830,1651001597199,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,,,,,mw,earthquake,"M 5.8 - 14 km NE of Eloúnda, Greece",25.814000,35.373000,15.00
1,,southeast of the Loyalty Islands,-946288579660,1652114853165,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,,,,,,earthquake,M ? - southeast of the Loyalty Islands,171.134000,-22.015000,180.00
2,6.07,"285 km E of Kuril’sk, Russia",-946309460790,1651001591890,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,,,,,mw,earthquake,"M 6.1 - 285 km E of Kuril’sk, Russia",151.498000,45.077000,25.00
3,3.42,"9km S of Borrego Springs, CA",-946394224430,1453944863830,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,3.0,0.8066,0.61,293.0,ml,earthquake,"M 3.4 - 9km S of Borrego Springs, CA",-116.367333,33.173167,6.00
4,3.97,"35km S of San Nicolas Is., CA",-946402749540,1453944629960,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,5.0,1.614,0.66,316.0,ml,earthquake,"M 4.0 - 35km S of San Nicolas Is., CA",-119.442000,32.929333,6.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2547,1.09,"0 km S of Colton, CA",1695514839520,1695656113633,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,50.0,0.09282,0.17,36.0,ml,earthquake,"M 1.1 - 0 km S of Colton, CA",-117.314167,34.071333,16.11
2548,0.14,"84 km NW of Karluk, Alaska",1695514282160,1696540900348,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,11.0,,0.09,82.0,ml,earthquake,"M 0.1 - 84 km NW of Karluk, Alaska",-155.259000,58.195167,2.55
2549,0.7,"30 km N of Sutcliffe, Nevada",1695514246071,1695580950150,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,,,,...,4.0,0.072,0.0215,280.74,ml,earthquake,"M 0.7 - 30 km N of Sutcliffe, Nevada",-119.668800,40.223400,8.40
2550,1.91,"2 km NW of Redwood Valley, CA",1695513995040,1696302470640,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/fdsnws/event/1/que...,1.0,3.1,,...,43.0,0.03224,0.07,50.0,md,earthquake,"M 1.9 - 2 km NW of Redwood Valley, CA",-123.223333,39.279500,4.43


In [10]:
df.to_csv("../data/usgs_data.csv", index=False)

In [11]:
len(errors)

0

In [2]:
df = pd.read_csv("../data/usgs_data.csv")

  df = pd.read_csv("../data/usgs_data.csv")


In [3]:
df.columns

Index(['mag', 'place', 'time', 'updated', 'tz', 'url', 'detail', 'felt', 'cdi',
       'mmi', 'alert', 'status', 'tsunami', 'sig', 'net', 'code', 'ids',
       'sources', 'types', 'nst', 'dmin', 'rms', 'gap', 'magType', 'type',
       'title', 'longitude', 'latitude', 'depth'],
      dtype='object')

In [5]:
df = df[df["type"] == "earthquake"]
df = df[["time", "longitude", "latitude", "depth", "mag"]]

In [7]:
df["time"] = df["time"].apply(lambda x: dt.datetime.fromtimestamp(x/1000))

In [6]:
df.to_csv("../data/usgs_data_small.csv", index=False)

In [7]:
df.isna().sum()

time              0
longitude         0
latitude          0
depth          1606
mag          164502
dtype: int64