# Getting data into Python

Outline:

- ASCII Files: numpy.loadtxt, astropy.io.ascii, read_csv (pandas)
- Reading/Writing FITS files: astropy.io.fits, fitsio
- IDL .sav files: scipy.readsav
- Pandas

# CSV data

In [2]:
import numpy as np
import requests

In [60]:
# get some CSV data from the SDSS SQL server
URL = "http://skyserver.sdss.org/dr12/en/tools/search/x_sql.aspx"

cmd = """
SELECT TOP 1000
    p.u, p.g, p.r, p.i, p.z, s.class, s.z, s.zerr
FROM
    PhotoObj AS p
JOIN
    SpecObj AS s ON s.bestobjid = p.objid
WHERE
    p.u BETWEEN 0 AND 19.6 AND
    p.g BETWEEN 0 AND 20 AND
    s.class = 'GALAXY'
"""

cmd = ' '.join(map(lambda x: x.strip(), cmd.split('\n')))
response = requests.get(URL, params={'cmd': cmd, 'format':'csv'})
with open('galaxy_colors.csv', 'w') as f:
    f.write(response.text)

In [69]:
!ls -lh galaxy_colors.csv

-rw-rw-r-- 1 kyle kyle 74K Sep 22 14:57 galaxy_colors.csv


In [3]:
!more galaxy_colors.csv

#Table1
u,g,r,i,z,class,z1,zerr
19.41061,18.23754,17.58132,17.20153,16.90159,GALAXY,0.03212454,6.06623E-06
19.54964,17.95799,17.02898,16.531,16.13408,GALAXY,0.1213151,2.358919E-05
18.74425,17.37778,16.80538,16.51149,16.28756,GALAXY,0.04876465,1.378529E-05
17.55033,15.75007,15.02809,14.66306,14.34982,GALAXY,0.04028672,1.167005E-05
17.60645,16.16628,15.51308,15.15529,14.87411,GALAXY,0.0254747,1.205017E-05
19.46927,18.18101,17.59062,17.25874,16.94567,GALAXY,0.03616738,8.249292E-06
19.58999,18.23981,17.54194,17.17573,16.92423,GALAXY,0.07254888,1.603681E-05
19.11101,16.80968,15.6026,15.1123,14.74625,GALAXY,0.156453,2.146377E-05
18.52309,16.65203,15.9179,15.47603,15.16455,GALAXY,0.06675781,1.785021E-05
18.7319,17.42271,16.80514,16.47006,16.18039,GALAXY,0.03646222,1.014089E-05
19.40927,18.40509,18.16815,18.03163,17.92519,GALAXY,0.0222007,4.418426E-06
19.51618,18.32554,17.63795,17.25495,17.02355,GALAXY,0.1380212,9.179801E-06
18.82419,17.60955,17.07287,16.79243,16.64695,GALAXY,0.03101574,1.4765

## Using numpy.loadtxt

In [6]:
dtype=[('u', 'f8'),
       ('g', 'f8'),
       ('r', 'f8'),
       ('i', 'f8'),
       ('z', 'f8'),
       ('class', 'S10'),
       ('redshift', 'f8'),
       ('redshift_err', 'f8')]
data = np.loadtxt('galaxy_colors.csv', skiprows=2, delimiter=',', dtype=dtype)

In [8]:
data[:10]

array([ (19.41061, 18.23754, 17.58132, 17.20153, 16.90159, b'GALAXY', 0.03212454, 6.06623e-06),
       (19.54964, 17.95799, 17.02898, 16.531, 16.13408, b'GALAXY', 0.1213151, 2.358919e-05),
       (18.74425, 17.37778, 16.80538, 16.51149, 16.28756, b'GALAXY', 0.04876465, 1.378529e-05),
       (17.55033, 15.75007, 15.02809, 14.66306, 14.34982, b'GALAXY', 0.04028672, 1.167005e-05),
       (17.60645, 16.16628, 15.51308, 15.15529, 14.87411, b'GALAXY', 0.0254747, 1.205017e-05),
       (19.46927, 18.18101, 17.59062, 17.25874, 16.94567, b'GALAXY', 0.03616738, 8.249292e-06),
       (19.58999, 18.23981, 17.54194, 17.17573, 16.92423, b'GALAXY', 0.07254888, 1.603681e-05),
       (19.11101, 16.80968, 15.6026, 15.1123, 14.74625, b'GALAXY', 0.156453, 2.146377e-05),
       (18.52309, 16.65203, 15.9179, 15.47603, 15.16455, b'GALAXY', 0.06675781, 1.785021e-05),
       (18.7319, 17.42271, 16.80514, 16.47006, 16.18039, b'GALAXY', 0.03646222, 1.014089e-05)], 
      dtype=[('u', '<f8'), ('g', '<f8'), ('r', '

## Using astropy.io.ascii

In [16]:
from astropy.io import ascii

In [29]:
data = ascii.read('galaxy_colors.csv', format='csv', comment='#')

In [30]:
type(data)

astropy.table.table.Table

In [26]:
data[:10]

u,g,r,i,z,class,z1,zerr
float64,float64,float64,float64,float64,str192,float64,float64
19.41061,18.23754,17.58132,17.20153,16.90159,GALAXY,0.03212454,6.06623e-06
19.54964,17.95799,17.02898,16.531,16.13408,GALAXY,0.1213151,2.358919e-05
18.74425,17.37778,16.80538,16.51149,16.28756,GALAXY,0.04876465,1.378529e-05
17.55033,15.75007,15.02809,14.66306,14.34982,GALAXY,0.04028672,1.167005e-05
17.60645,16.16628,15.51308,15.15529,14.87411,GALAXY,0.0254747,1.205017e-05
19.46927,18.18101,17.59062,17.25874,16.94567,GALAXY,0.03616738,8.249292e-06
19.58999,18.23981,17.54194,17.17573,16.92423,GALAXY,0.07254888,1.603681e-05
19.11101,16.80968,15.6026,15.1123,14.74625,GALAXY,0.156453,2.146377e-05
18.52309,16.65203,15.9179,15.47603,15.16455,GALAXY,0.06675781,1.785021e-05
18.7319,17.42271,16.80514,16.47006,16.18039,GALAXY,0.03646222,1.014089e-05


## Using pandas

In [31]:
import pandas

In [34]:
data = pandas.read_csv('galaxy_colors.csv', comment='#')
data[:10]

Unnamed: 0,u,g,r,i,z,class,z1,zerr
0,19.41061,18.23754,17.58132,17.20153,16.90159,GALAXY,0.032125,6e-06
1,19.54964,17.95799,17.02898,16.531,16.13408,GALAXY,0.121315,2.4e-05
2,18.74425,17.37778,16.80538,16.51149,16.28756,GALAXY,0.048765,1.4e-05
3,17.55033,15.75007,15.02809,14.66306,14.34982,GALAXY,0.040287,1.2e-05
4,17.60645,16.16628,15.51308,15.15529,14.87411,GALAXY,0.025475,1.2e-05
5,19.46927,18.18101,17.59062,17.25874,16.94567,GALAXY,0.036167,8e-06
6,19.58999,18.23981,17.54194,17.17573,16.92423,GALAXY,0.072549,1.6e-05
7,19.11101,16.80968,15.6026,15.1123,14.74625,GALAXY,0.156453,2.1e-05
8,18.52309,16.65203,15.9179,15.47603,15.16455,GALAXY,0.066758,1.8e-05
9,18.7319,17.42271,16.80514,16.47006,16.18039,GALAXY,0.036462,1e-05


In [35]:
type(data)

pandas.core.frame.DataFrame

# Reading FITS files