In [1]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sb
import numpy as np
import urllib

In [2]:
# Automated way to download earthquake data!
url = 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&starttime=2021-2-1&endtime=2022-2-1&minmagnitude=4.5&orderby=time'
urllib.request.urlretrieve(url,'query.csv')

earth = pd.read_csv('query.csv')
print(earth)

                          time  latitude  longitude  depth  mag magType  nst  \
0     2022-01-31T22:18:32.343Z  -20.8636  -175.3691  10.00  4.6      mb  NaN   
1     2022-01-31T22:01:27.405Z   31.9193   104.3384  10.00  4.6      mb  NaN   
2     2022-01-31T21:36:17.102Z  -29.8948   -71.9834   9.43  4.5     mwr  NaN   
3     2022-01-31T20:07:12.636Z  -20.5450  -175.3251  10.00  4.7      mb  NaN   
4     2022-01-31T19:18:23.415Z  -30.0451  -177.5443  20.72  4.8      mb  NaN   
...                        ...       ...        ...    ...  ...     ...  ...   
9117  2021-02-01T05:47:52.716Z   38.8807    25.9761  10.00  4.8      mb  NaN   
9118  2021-02-01T05:46:52.958Z   38.9677    26.0599  10.00  4.8      mb  NaN   
9119  2021-02-01T05:31:49.212Z    4.1646   126.5903  49.04  4.5      mb  NaN   
9120  2021-02-01T05:19:59.928Z  -59.1499   -25.9477  71.80  5.1      mb  NaN   
9121  2021-02-01T00:12:47.586Z  -28.9953  -177.2697  63.01  4.6      mb  NaN   

        gap   dmin   rms  ...          

## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

## Mini lesson on idxmax (idxmin), which is similar to argmax (argmin) in NumPy: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmax.html

## Remember, this type of function gives us the INDEX of the maximum (or minimum) value in the data structure
## We'll also use a similar function in Xarray!

## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

### Let's look for highest latitude in Northern Hemisphere where an earthquake event occured in this dataset!

In [3]:
mlat1 = earth.max(numeric_only=True) # applied to whole dataframe, returns max of each column by default 
print(mlat1)
print(type(mlat1)) # This is a series
print(mlat1.index) # This is the index of the series

latitude            87.3752
longitude          179.9962
depth              664.7400
mag                  8.2000
nst                145.0000
gap                290.0000
dmin                50.1330
rms                  2.8200
horizontalError     22.2000
depthError          31.6100
magError             0.5580
magNst             706.0000
dtype: float64
<class 'pandas.core.series.Series'>
Index(['latitude', 'longitude', 'depth', 'mag', 'nst', 'gap', 'dmin', 'rms',
       'horizontalError', 'depthError', 'magError', 'magNst'],
      dtype='object')


In [4]:
# REMINDERS
# dataframe['col name'] is a Series!
# So we will get the max in just that column if we apply the max method to it 
mlat = earth['latitude'].max()
print(mlat) # the actual maximum latitude

87.3752


In [5]:
mlat_ind = earth['latitude'].idxmax() 
print(mlat_ind) # the row index of this maximum latitude 

8210


In [6]:
# Let's confirm...
       # for loc, we use index labels, not positional integer indices, remember?
       # row index label  # column index label
print(earth.loc[mlat_ind,'latitude']) # One of multiple ways to do this 

print(earth.loc[mlat_ind,:])

87.3752
time               2021-03-03T13:09:22.048Z
latitude                            87.3752
longitude                          -72.5536
depth                                  10.0
mag                                     5.0
magType                                  mb
nst                                     NaN
gap                                    46.0
dmin                                 10.914
rms                                     0.5
net                                      us
id                               us7000df50
updated            2021-05-13T22:42:18.040Z
place                       Lomonosov Ridge
type                             earthquake
horizontalError                         9.0
depthError                              1.8
magError                              0.028
magNst                                394.0
status                             reviewed
locationSource                           us
magSource                                us
Name: 8210, dtype: objec

## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
## Reminders on working with data structures in Pandas: 
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

## What if....I want the magnitudes of earthquakes that occur at a latitude north of 35 deg N?

## Let's break down, step by step, what we'd do

In [7]:
earth['latitude']>35  # True for each row where lat > 35, false for each row where lat < 35

0       False
1       False
2       False
3       False
4       False
        ...  
9117     True
9118     True
9119    False
9120    False
9121    False
Name: latitude, Length: 9122, dtype: bool

In [8]:
higher_lat = earth[earth['latitude']>35] # displays any rows (spanning over all columns) in dataframe whose lat > 35
print(higher_lat)
# Why?

# We are indexing our original dataframe that contains ALL earthquake data
# Using a Boolean array 
# So - what results is only the rows in our dataframe whose lat > 35

                          time  latitude  longitude  depth  mag magType   nst  \
6     2022-01-31T17:10:21.129Z   36.8510   -97.7935   7.85  4.5     mww  96.0   
19    2022-01-31T09:30:36.617Z   37.9265    32.0980  10.00  4.5      mb   NaN   
21    2022-01-31T00:49:57.322Z   39.1923    40.1603  10.00  4.7     mwr   NaN   
41    2022-01-30T10:14:38.004Z   53.7360   161.1118  44.13  4.7      mb   NaN   
72    2022-01-29T10:23:00.165Z   53.8399  -164.0710  35.00  4.6      mb   NaN   
...                        ...       ...        ...    ...  ...     ...   ...   
9114  2021-02-01T08:34:45.955Z   37.3523   141.8861  34.92  4.6      mb   NaN   
9115  2021-02-01T07:43:08.929Z   49.9861   -28.9615  10.00  4.6      mb   NaN   
9116  2021-02-01T06:00:05.082Z   38.9080    26.0285  10.00  4.8      mb   NaN   
9117  2021-02-01T05:47:52.716Z   38.8807    25.9761  10.00  4.8      mb   NaN   
9118  2021-02-01T05:46:52.958Z   38.9677    26.0599  10.00  4.8      mb   NaN   

        gap    dmin   rms  

In [9]:
# what if I only want to access the magnitude column?
higher_lat['mag']

6       4.5
19      4.5
21      4.7
41      4.7
72      4.6
       ... 
9114    4.6
9115    4.6
9116    4.8
9117    4.8
9118    4.8
Name: mag, Length: 1075, dtype: float64