# Numpy
http://www.numpy.org/

# Scipy
http://www.scipy.org/

# Astropy
http://www.astropy.org/

## Pandas


Examples from [gully](https://github.com/gully)
* https://github.com/BrownDwarf/ApJdataFrames

From https://github.com/jonathansick/awesome-astronomy
- [AplPy](http://aplpy.github.io) - a Python module for astronomical imaging data.
- [Astropy](http://astropy.org) - Core package for Astronomy in Python.
- [AstroML](http://www.astroml.org) - Companion textbook *Statistics, Data Mining, and Machine Learning in Astronomy*.
- [Emcee](http://dan.iel.fm/emcee/current/) - Seriously kick-ass MCMC.
- [GalPy](https://github.com/jobovy/galpy) - Galactic Dynamics in python.
- [The LSST Stack](https://confluence.lsstcorp.org/display/LSWUG/Using+the+LSST+Stack) Conda-installable codes for working with LSST simulation data and some existing data.

# Organize data

## numpy.ndarray
    simple but powerful
## numpy.ma.MaskedArray
    ndarray with mask
## astropy.table
    specifically designed for astro table
## pandas.DataFrame
    good at data clearning, suitable for collect and clean data from real life

# Basic about numpy.ndarray

In [1]:
a = [1,2,3]
b = a + 1

TypeError: can only concatenate list (not "int") to list

In [2]:
import numpy as np
a = np.array([1,2,3])
a + 1

array([2, 3, 4])

In [3]:
a = np.arange(0,10,0.5)
print(a)
print(a.dtype)
print(np.sin(a))
print(np.mean(a))

[ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.
  7.5  8.   8.5  9.   9.5]
float64
[ 0.          0.47942554  0.84147098  0.99749499  0.90929743  0.59847214
  0.14112001 -0.35078323 -0.7568025  -0.97753012 -0.95892427 -0.70554033
 -0.2794155   0.21511999  0.6569866   0.93799998  0.98935825  0.79848711
  0.41211849 -0.07515112]
4.75


In [4]:
a>3

array([False, False, False, False, False, False, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [5]:
a[a>3]

array([ 3.5,  4. ,  4.5,  5. ,  5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,
        9. ,  9.5])

In [6]:
np.where(a>3)

(array([ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),)

In [7]:
a[np.where(a>3)]

array([ 3.5,  4. ,  4.5,  5. ,  5.5,  6. ,  6.5,  7. ,  7.5,  8. ,  8.5,
        9. ,  9.5])

In [8]:
a[(a>3)&(a<4)], a[np.logical_and(a>3, a<4)]

(array([ 3.5]), array([ 3.5]))

In [9]:
b = a.astype(np.int)
print(b)
print(b.dtype)

[0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9]
int64


In [10]:
a = np.arange(24).reshape((2,3,4))
print(a.shape)
a

(2, 3, 4)


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [11]:
print(a[:,1:2,2])
print('')
print(a[0,:,1])
print('')
print(a[:])

[[ 6]
 [18]]

[1 5 9]

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


In [12]:
import random
dataHeaders = " id  x     y ra     dec".split()
dataTypes   = "U20 i8 int16 f8 float16".split()
todo = [(header, type) for header, type in zip(dataHeaders, dataTypes)]
dataType = np.dtype(todo);print(todo)

# generate data to read
outputList = []
for i in range(10):
    outputList.append("{:10} {:10d} {:10d} {:10f} {:10f}".
        format(np.random.randint(9999),
               np.random.randint(9999),
               np.random.randint(9999),
               np.random.rand(),
               np.random.rand()))
with open('./temp.dat', 'w') as f:
    f.write('\n'.join(outputList))

[('id', 'U20'), ('x', 'i8'), ('y', 'int16'), ('ra', 'f8'), ('dec', 'float16')]


In [13]:
# load it use np.loadtxt
data = np.loadtxt('./temp.dat', dtype=dataType)
data

array([(u'231', 1174,   38,  0.172004,  0.85693359),
       (u'9142', 1860, 7835,  0.724634,  0.88623047),
       (u'963',  371,  511,  0.814943,  0.86669922),
       (u'2767', 2651, 1679,  0.727648,  0.51513672),
       (u'3734', 4386, 3404,  0.16071 ,  0.76904297),
       (u'8096', 9604, 4940,  0.318443,  0.91552734),
       (u'9601', 8108, 5657,  0.49393 ,  0.80175781),
       (u'8778', 2656, 1943,  0.686984,  0.04711914),
       (u'610', 2702, 6457,  0.409797,  0.2902832 ),
       (u'7534', 2779, 8248,  0.334703,  0.08410645)],
      dtype=[('id', '<U20'), ('x', '<i8'), ('y', '<i2'), ('ra', '<f8'), ('dec', '<f2')])

In [14]:
import pprint
# load it manually
dataList = []
with open('./temp.dat') as f:
    for eachline in f:
        eachRow = tuple(eachline.split())
        dataList.append(eachRow)
pprint.pprint(dataList)
data = np.array(dataList, dtype=dataType)
data

[('231', '1174', '38', '0.172004', '0.856956'),
 ('9142', '1860', '7835', '0.724634', '0.886277'),
 ('963', '371', '511', '0.814943', '0.866906'),
 ('2767', '2651', '1679', '0.727648', '0.514914'),
 ('3734', '4386', '3404', '0.160710', '0.769198'),
 ('8096', '9604', '4940', '0.318443', '0.915318'),
 ('9601', '8108', '5657', '0.493930', '0.801699'),
 ('8778', '2656', '1943', '0.686984', '0.047127'),
 ('610', '2702', '6457', '0.409797', '0.290370'),
 ('7534', '2779', '8248', '0.334703', '0.084105')]


array([(u'231', 1174,   38,  0.172004,  0.85693359),
       (u'9142', 1860, 7835,  0.724634,  0.88623047),
       (u'963',  371,  511,  0.814943,  0.86669922),
       (u'2767', 2651, 1679,  0.727648,  0.51513672),
       (u'3734', 4386, 3404,  0.16071 ,  0.76904297),
       (u'8096', 9604, 4940,  0.318443,  0.91552734),
       (u'9601', 8108, 5657,  0.49393 ,  0.80175781),
       (u'8778', 2656, 1943,  0.686984,  0.04711914),
       (u'610', 2702, 6457,  0.409797,  0.2902832 ),
       (u'7534', 2779, 8248,  0.334703,  0.08410645)],
      dtype=[('id', '<U20'), ('x', '<i8'), ('y', '<i2'), ('ra', '<f8'), ('dec', '<f2')])

In [15]:
dataHeaders = " id    obj".split()
dataTypes   = "U20 object".split()
todo = [(header, type) for header, type in zip(dataHeaders, dataTypes)]
dataType = np.dtype(todo);print(todo)

rawData = [
    ('list', [1,2,3]),
    ('dict', {1:1, 2:2}),
    ('tuple', (1,2,3)),
    ('another ndarray', np.array([1,2,3,4])),
    ('any thing', np),
]

newData = np.array(rawData, dtype=dataType)
newData

[('id', 'U20'), ('obj', 'object')]


array([(u'list', list([1, 2, 3])), (u'dict', {1: 1, 2: 2}),
       (u'tuple', (1, 2, 3)), (u'another ndarray', array([1, 2, 3, 4])),
       (u'any thing', <module 'numpy' from '/usr/local/lib/python2.7/site-packages/numpy/__init__.pyc'>)],
      dtype=[('id', '<U20'), ('obj', 'O')])

In [16]:
print(newData['id'])
print(newData['obj'])

[u'list' u'dict' u'tuple' u'another ndarray' u'any thing']
[list([1, 2, 3]) {1: 1, 2: 2} (1, 2, 3) array([1, 2, 3, 4])
 <module 'numpy' from '/usr/local/lib/python2.7/site-packages/numpy/__init__.pyc'>]


# Cautions about python3, encoding

In [17]:
a = '你好'
b = u'你好'
c = a.decode('utf8')
print(a)
print(b)
print(c)
print((a,b,c,c.encode('utf8')))

你好
你好
你好
('\xe4\xbd\xa0\xe5\xa5\xbd', u'\u4f60\u597d', u'\u4f60\u597d', '\xe4\xbd\xa0\xe5\xa5\xbd')


# freestyle to learn python package

* document
* jupyter + notebook or ipython
* make your own notes or snippets

# Debug

    import pdb
    pdb.set_trace()

    import ipdb
    ipdb.set_trace()