# 04_03_pandas.ipynb - Data tables with Pandas

In [None]:
import requests

import numpy as np
import matplotlib.pyplot as pp
import pandas as pd
import astropy.units

In [None]:
# utility function from the last video

def dump(url, filename):
    r = requests.get(url)
    
    with open(filename, 'wb') as outfile:
        outfile.write(r.content)

In [None]:
# in case of network problems, you can use the file cache/gwtc-20210726.csv
dump('https://www.gw-openscience.org/eventapi/csv/GWTC', 'gwtc.csv')

In [None]:
gwtc = pd.read_csv('gwtc.csv')

In [None]:
gwtc

In [None]:
gwtc.head()

In [None]:
gwtc.info()

In [None]:
gwtc.describe()

In [None]:
gwtc_i = gwtc.set_index('id').sort_index()

In [None]:
gwtc_i.head()

In [None]:
gwtc_i.loc['GW150914-v3']

In [None]:
gwtc_i.loc['GW150914-v3':'GW151226-v2']

In [None]:
gwtc_i.luminosity_distance

In [None]:
(gwtc_i.luminosity_distance.max() * astropy.units.Mpc).to('lightyear')

In [None]:
gwtc_i['total_mass_source']

In [None]:
# gwtc_i['total_mass_source'] = gwtc_i['mass_1_source'] + gwtc_i['mass_2_source']

In [None]:
missing = np.isnan(gwtc_i.total_mass_source)

In [None]:
missing

In [None]:
gwtc_i.loc[missing, 'total_mass_source'] = (gwtc_i.loc[missing, 'mass_1_source'] +
                                            gwtc_i.loc[missing, 'mass_2_source'])

In [None]:
gwtc_i['total_mass_source']

In [None]:
pp.plot(gwtc_i.total_mass_source, gwtc_i.luminosity_distance, '.')
pp.xlabel('total mass (Msun)')
pp.ylabel('luminosity distance (Mpc)')

In [None]:
gwtc_i.plot.scatter('total_mass_source', 'luminosity_distance', s=50,
                    figsize=(9,6))

In [None]:
gwtc_i.plot.scatter('total_mass_source', 'luminosity_distance',
                    s=gwtc['network_matched_filter_snr'] * 15,  # some trial-and-error to set sizes just right
                    linewidth=1, edgecolors='w',                # a white contour helps tell spots apart
                    c=gwtc['redshift'], colormap='plasma',     # set colors using colormap 
                    sharex=False,    # make sure x label does not disappear (thanks, stackoverflow.com)
                    figsize=(10,6))

In [None]:
gwtc_i.plot.scatter('mass_1_source', 'mass_2_source',
                    s=gwtc['total_mass_source']*3,
                    linewidth=1, edgecolors='w',
                    c=gwtc['redshift'], colormap='plasma',
                    sharex=False, figsize=(10,6))

pp.plot([0,100],[0,100],'k:')
pp.plot([0,100],[0,50],'k:')
pp.plot([0,100],[0,20],'k:')

pp.axis([0,100,0,100]);