In [2]:
import datetime as dt
import numpy as np
import pandas as pd

In [3]:
np.random.seed(0) # set a seed for reproducibility
pd.Series(np.random.rand(5), name='random')

0    0.548814
1    0.715189
2    0.602763
3    0.544883
4    0.423655
Name: random, dtype: float64

In [4]:
np.random.seed() # set seed so result is reproducible

pd.DataFrame(
    {
        'random': np.random.rand(5),
        'text': ['hot', 'warm', 'cool', 'cold', None],
        'truth': [np.random.choice([True, False]) for _ in range(5)]
    },
    index = pd.date_range(
    end=dt.date(2019, 4, 21),
    freq='1D',
    periods=5,
    name='date')
)

Unnamed: 0_level_0,random,text,truth
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-17,0.555251,hot,True
2019-04-18,0.661498,warm,True
2019-04-19,0.138052,cool,True
2019-04-20,0.027943,cold,True
2019-04-21,0.466988,,True


In [5]:
pd.DataFrame([
    {'mag': 5.2, 'place': 'California'},
    {'mag': 1.2, 'place': 'Alaska'},
    {'mag': 0.2, 'place': 'California'},
])

Unnamed: 0,mag,place
0,5.2,California
1,1.2,Alaska
2,0.2,California


In [6]:
list_of_tuples = [(n, n**2, n**3) for n in range(5)]
list_of_tuples

[(0, 0, 0), (1, 1, 1), (2, 4, 8), (3, 9, 27), (4, 16, 64)]

In [7]:
pd.DataFrame(
    list_of_tuples,
    columns=['n', 'n_squared', 'n_cubed']
)

Unnamed: 0,n,n_squared,n_cubed
0,0,0,0
1,1,1,1
2,2,4,8
3,3,9,27
4,4,16,64


In [8]:
pd.DataFrame(
    np.array([
        [0, 0, 0],
        [1, 1, 1],
        [2, 4, 8],
        [3, 9, 27],
        [4, 16, 64]
    ]), columns=['n', 'n_squared', 'n_cubed']
)

Unnamed: 0,n,n_squared,n_cubed
0,0,0,0
1,1,1,1
2,2,4,8
3,3,9,27
4,4,16,64


In [9]:
!wc -l data/earthquakes.csv

9333 data/earthquakes.csv


In [10]:
!ls -lh data | grep earthquakes.csv

-rw-rw-r-- 1 ajay ajay 3.4M Aug 23 20:04 earthquakes.csv


In [11]:
files = !ls -lh data
[file for file in files if 'earthquake' in file]

['-rw-rw-r-- 1 ajay ajay 3.4M Aug 23 20:04 earthquakes.csv']

In [12]:
!head -n 2 data/earthquakes.csv

alert,cdi,code,detail,dmin,felt,gap,ids,mag,magType,mmi,net,nst,place,rms,sig,sources,status,time,title,tsunami,type,types,tz,updated,url
,,37389218,https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ci37389218&format=geojson,0.008693,,85.0,",ci37389218,",1.35,ml,,ci,26.0,"9km NE of Aguanga, CA",0.19,28,",ci,",automatic,1539475168010,"M 1.4 - 9km NE of Aguanga, CA",0,earthquake,",geoserve,nearby-cities,origin,phase-data,",-480.0,1539475395144,https://earthquake.usgs.gov/earthquakes/eventpage/ci37389218


In [13]:
!awk -F',' '{print NF; exit}' data/earthquakes.csv

26


In [14]:
headers = !head -n 1 data/earthquakes.csv
len(headers[0].split(','))

26

In [15]:
df = pd.read_csv('data/earthquakes.csv')

In [16]:
df = pd.read_csv('https://github.com/stefmolin/Hands-On-Data-Analysis-with-Pandas-2nd-edition/blob/master/ch_02/data/earthquakes.csv?raw=True')

In [17]:
df.to_csv('output.csv', index=False)

In [18]:
import sqlite3

In [19]:
with sqlite3.connect('data/quakes.db') as connection:
    pd.read_csv('data/tsunamis.csv').to_sql(
        'tsunamis', connection,  index=False,
        if_exists='replace'
    )

In [20]:
with sqlite3.connect('data/quakes.db') as connection:
    tsunamis = pd.read_sql('SELECT * FROM tsunamis', connection)

In [21]:
tsunamis.head()

Unnamed: 0,alert,type,title,place,magType,mag,time
0,,earthquake,"M 5.0 - 165km NNW of Flying Fish Cove, Christm...","165km NNW of Flying Fish Cove, Christmas Island",mww,5.0,1539459504090
1,green,earthquake,"M 6.7 - 262km NW of Ozernovskiy, Russia","262km NW of Ozernovskiy, Russia",mww,6.7,1539429023560
2,green,earthquake,"M 5.6 - 128km SE of Kimbe, Papua New Guinea","128km SE of Kimbe, Papua New Guinea",mww,5.6,1539312723620
3,green,earthquake,"M 6.5 - 148km S of Severo-Kuril'sk, Russia","148km S of Severo-Kuril'sk, Russia",mww,6.5,1539213362130
4,green,earthquake,"M 6.2 - 94km SW of Kokopo, Papua New Guinea","94km SW of Kokopo, Papua New Guinea",mww,6.2,1539208835130


In [22]:
import requests

In [23]:
yesterday = dt.date.today() - dt.timedelta(days=1)

api = 'https://earthquake.usgs.gov/fdsnws/event/1/query'

payload = {
    'format': 'geojson',
    'starttime': yesterday - dt.timedelta(days=30),
    'endtime': yesterday
}

response = requests.get(api, params=payload)

In [24]:
response.status_code

200

In [25]:
earthquake_json = response.json()
earthquake_json.keys()

dict_keys(['type', 'metadata', 'features', 'bbox'])

In [26]:
earthquake_json['features'][0]

{'type': 'Feature',
 'properties': {'mag': 0.87,
  'place': '18km ESE of Little Lake, CA',
  'time': 1629469130120,
  'updated': 1629469342245,
  'tz': None,
  'url': 'https://earthquake.usgs.gov/earthquakes/eventpage/ci40016408',
  'detail': 'https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=ci40016408&format=geojson',
  'felt': None,
  'cdi': None,
  'mmi': None,
  'alert': None,
  'status': 'automatic',
  'tsunami': 0,
  'sig': 12,
  'net': 'ci',
  'code': '40016408',
  'ids': ',ci40016408,',
  'sources': ',ci,',
  'types': ',nearby-cities,origin,phase-data,scitech-link,',
  'nst': 15,
  'dmin': 0.08366,
  'rms': 0.15,
  'gap': 65,
  'magType': 'ml',
  'type': 'earthquake',
  'title': 'M 0.9 - 18km ESE of Little Lake, CA'},
 'geometry': {'type': 'Point', 'coordinates': [-117.7163333, 35.8825, 5.48]},
 'id': 'ci40016408'}

In [27]:
earthquake_properties_data = [
    quake['properties']
    for quake in earthquake_json['features']
]