# Preparing data on earthquakes

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from os.path import basename, exists

def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve
        local, _ = urlretrieve(url, filename)
        print('Downloaded ' + local)

In [3]:
download("https://scedc.caltech.edu/ftp/catalogs/SCSN/SCSN_catalogs.tar.gz")

In [4]:
!ls -lh SCSN_catalogs.tar.gz

-rw-rw-r-- 1 downey downey 19M Mar  1 19:53 SCSN_catalogs.tar.gz


In [5]:
!tar -xzf SCSN_catalogs.tar.gz

In [6]:
quake_dfs = []
for i in range(1981, 2023):
    filename = f'SCSN/{i}.catalog'
    df = pd.read_fwf(filename, colspecs='infer', skiprows=9)

    # drop the last row
    n = len(df)
    df.drop(n-1, inplace=True)
    
    print(i, n, df['MAG'].isna().sum(), df['MAG'].min())
    quake_dfs.append(df)

1981 10685 0 0.0
1982 14028 0 0.0
1983 14465 0 0.0
1984 17890 0 0.0
1985 18848 0 0.0
1986 17073 0 0.0
1987 13577 0 0.0
1988 11070 0 0.0
1989 11568 0 0.0
1990 11239 0 0.0
1991 10093 0 0.0
1992 51434 0 0.0
1993 22014 0 0.0
1994 28255 0 0.0
1995 24990 0 0.0
1996 19875 0 0.0
1997 15321 0 0.0
1998 13556 0 0.0
1999 21380 0 0.0
2000 20375 0 0.0
2001 18116 0 0.0
2002 11858 0 0.0
2003 11547 0 0.0
2004 12259 0 0.0
2005 13206 0 0.0
2006 11196 0 0.0
2007 11485 0 0.0
2008 14059 0 0.0
2009 16732 0 0.0
2010 41960 0 0.0
2011 15566 0 0.0
2012 17164 0 0.0
2013 18545 0 0.0
2014 14430 0 0.0
2015 15587 0 0.0
2016 16029 0 0.0
2017 16345 0 0.0
2018 20731 0 0.0
2019 63572 0 0.0
2020 35282 0 0.0
2021 23202 0 0.0
2022 16886 0 0.0


In [7]:
quake = pd.concat(quake_dfs)
quake.shape

(803451, 14)

In [8]:
columns = ['#YYY', 'MM', 'DD', 'MAG']
quake[columns].to_csv('quake.csv', index=False)

In [9]:
!ls -lh quake.csv

-rw-rw-r-- 1 downey downey 12M Mar  1 20:00 quake.csv


*Elements of Data Science*

Copyright 2022 Allen Downey

License: [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)