# Summer Research Master Tutorial

## Table of Contents

#### Numpy
#### Pandas
#### MatPlotLib
#### Healpy
#### General Python Tips & Tricks

## Numpy

In [7]:
import numpy as np

Create an array using <b>np.arange()

In [9]:
# np.arange(start,stop,step)
arr_by_fives = np.arange(10,40,5)
arr_by_fives

array([10, 15, 20, 25, 30, 35])

In [26]:
# np.arange(start,stop,step)
# you can have a float step
float_step = np.arange(0,3,0.25)
float_step

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  , 2.25, 2.5 ,
       2.75])

In [25]:
# np.arange(start,stop)
# !! you cannot do np.arange(stop,step) !!
start_stop = np.arange(3,8)
start_stop

array([3, 4, 5, 6, 7])

In [10]:
# np.arange(stop)
simple_arr = np.arange(10)
simple_arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Create an array with a desired number of elements using <b>np.linspace()</b>

In [29]:
# create an array to serve as bin edges for 10 buckets
# np.linspace(start,stop,# of edges)
# np.linspace(start,stop,(# of buckets) + 1)
buckets = np.linspace(5,8,11)
buckets

array([5. , 5.3, 5.6, 5.9, 6.2, 6.5, 6.8, 7.1, 7.4, 7.7, 8. ])

Learn more about your data using <b>.shape, type(), .size, .ndim, .dtype.name

In [11]:
arr_by_fives.shape

(6,)

In [31]:
float_step.ndim

1

In [32]:
type(start_stop)

numpy.ndarray

In [33]:
type(start_stop[0])

numpy.int64

In [17]:
simple_arr.dtype.name

'int64'

In [34]:
buckets.size

11

Matrix math

In [41]:
a = np.arange(0,10)
b = np.linspace(4,5,10)

In [42]:
# element-wise subtraction
a - b

array([-4.        , -3.11111111, -2.22222222, -1.33333333, -0.44444444,
        0.44444444,  1.33333333,  2.22222222,  3.11111111,  4.        ])

In [43]:
# element-wise multiplication
a * b

array([ 0.        ,  4.11111111,  8.44444444, 13.        , 17.77777778,
       22.77777778, 28.        , 33.44444444, 39.11111111, 45.        ])

In [44]:
# matrix multiplication
a @ b

211.66666666666669

Creating arrays using <b>np.zeros</b> and <b>np.ones()</b>

In [54]:
# np.ones(# of elements)
np.ones(20)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1.])

In [50]:
# np.ones((rows,cols),dtype)
ones_2d_arr = np.ones((3,2),dtype=int)
ones_2d_arr

array([[1, 1],
       [1, 1],
       [1, 1]])

In [57]:
# use multiplication to change array value
eights_arr = ones_2d_arr * 8
eights_arr

array([[8, 8],
       [8, 8],
       [8, 8]])

In [56]:
# np.zeros(# of elements)
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

Using <b>.reshape</b>

In [61]:
# 1-d array of 20 elements
to_reshape = np.ones(20)
to_reshape

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1.])

In [62]:
# 4-d array of 5 elements each
to_reshape = to_reshape.reshape((4,5))
to_reshape

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

Useful array operations using <b> np.max(), np.min(), np.median(), np.mean(), np.std(), np.var()</b>, and <b>np.sum()</b>

In [63]:
test_arr = [5,3,21,7,-2,8]
np.max(test_arr)

21

In [64]:
np.min(test_arr)

-2

In [75]:
np.median(test_arr)

6.0

In [76]:
np.mean(test_arr)

7.0

In [77]:
np.std(test_arr)

7.047458170621991

In [78]:
np.var(test_arr)

49.666666666666664

In [65]:
np.sum(test_arr)

42

Make a histogram using <b>np.histogram()</b>

In [70]:
hist_data = [0,2,0,5,0,9,0,1]
my_histogram,bin_edges = np.histogram(hist_data)

In [74]:
# four 0s, one 1, one 2, one 5, one 9
my_histogram

array([4, 1, 1, 0, 0, 1, 0, 0, 0, 1])

In [73]:
# number of bin edges is (# of buckets) + 1
bin_edges

array([0. , 0.9, 1.8, 2.7, 3.6, 4.5, 5.4, 6.3, 7.2, 8.1, 9. ])

Use <b>np.correlate()</b> for auto and cross correlation

In [95]:
c = [0,1,2,3,4,5]
d = [5,6,7,8,9,10]
# cross correlate
np.correlate(c,d)

array([130])

In [100]:
# auto correlate
np.correlate(d,d)

array([355])