In [1]:
# The following is to know when this notebook has been run and with which python version.
import time, sys
print(time.ctime())
print(sys.version.split('|')[0])
import numpy as np

Thu May 17 15:36:07 2018
3.6.1 


# C: How to read and write files (ASCII and FITS)

This is part of the Python lecture given by Christophe Morisset at IA-UNAM.

Some informations are here: http://www.tutorialspoint.com/python/python_files_io.htm

### Using numpy loadtxt

http://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html

In [2]:
%%writefile data2.dat
# The following data are for test purpose
N    f   x   y type
1   2.3  6   8 star
2   3.5  7   9 galaxy
3  -4.2  5   7 cluster
#4  -10.5  5  7 test

Overwriting data2.dat


In [3]:
# Fast way for reading the file
# One hace to tell to skip the 2 first rows
# skiprows 
b = np.loadtxt('data2.dat', skiprows=2, dtype='i4,f, f, f, U10')

In [4]:
print(b)

[(1,  2.29999995,  6.,  8., "b'star'")
 (2,  3.5       ,  7.,  9., "b'galaxy'")
 (3, -4.19999981,  5.,  7., "b'cluster'")]


In [5]:
type(b)

numpy.ndarray

In [6]:
# The names of the columns are f0, f1, f2, etc
b.dtype

dtype([('f0', '<i4'), ('f1', '<f4'), ('f2', '<f4'), ('f3', '<f4'), ('f4', '<U10')])

### Using numpy genfromtxt

http://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html

In [7]:
# Fast and versatile way to read the file
# the names are taken from the file
# The types are defined automatically when reading the columns
c = np.genfromtxt('data2.dat', names=True, dtype=None, skip_header=1)

In [8]:
print(c)

[(1,  2.3, 6, 8, b'star') (2,  3.5, 7, 9, b'galaxy')
 (3, -4.2, 5, 7, b'cluster')]


In [9]:
type(c)

numpy.ndarray

In [10]:
c.dtype

dtype([('N', '<i8'), ('f', '<f8'), ('x', '<i8'), ('y', '<i8'), ('type', 'S7')])

In [11]:
c['f']

array([ 2.3,  3.5, -4.2])

### Using recfrom to obtain a record array

In [12]:
# Uses the same keywords than genfromtxt
f = np.recfromtxt('data2.dat', names=True, dtype=None, skip_header=1, 
                  usecols=("N", "f", "type"))

In [13]:
f

rec.array([(1,  2.3, b'star'), (2,  3.5, b'galaxy'), (3, -4.2, b'cluster')], 
          dtype=[('N', '<i8'), ('f', '<f8'), ('type', 'S7')])

In [14]:
f.N

array([1, 2, 3])

### Pandas Data Frames

https://pandas.pydata.org/pandas-docs/stable/

In [15]:
import pandas as pd

In [16]:
df = pd.read_table('data2.dat', comment='#', delim_whitespace=True)

In [17]:
df

Unnamed: 0,N,f,x,y,type
0,1,2.3,6,8,star
1,2,3.5,7,9,galaxy
2,3,-4.2,5,7,cluster


In [18]:
df.keys()

Index(['N', 'f', 'x', 'y', 'type'], dtype='object')

In [19]:
df['f']

0    2.3
1    3.5
2   -4.2
Name: f, dtype: float64

In [20]:
df.values # the numpy array

array([[1, 2.3, 6, 8, 'star'],
       [2, 3.5, 7, 9, 'galaxy'],
       [3, -4.2, 5, 7, 'cluster']], dtype=object)

In [21]:
df['f'].values

array([ 2.3,  3.5, -4.2])

In [22]:
df.mean()

N    2.000000
f    0.533333
x    6.000000
y    8.000000
dtype: float64