# Introuction to NumPy 

## NumPy : Numeric computing library
`NumPy` (Numerical Python) is one of the core packages for numerical computing in Python. Pandas, Matplotlib, Statmodels and many other Scientific libraries rely on NumPy.

<img src="https://user-images.githubusercontent.com/7065401/39118381-910eb0c2-46e9-11e8-81f1-a5b897401c23.jpeg"
    style="width:300px; float: right; margin: 0 40px 40px 40px;"></img>

NumPy major contributions are:

* Efficient numeric computation with C primitives
* Efficient collections with vectorized operations
* An integrated and natural Linear Algebra API
* A C API for connecting NumPy with libraries written in C, C++, or FORTRAN.

In Python, everything is an object, which means that even simple `ints` are also objects, with all the required machinery to make object work. We call them "Boxed Ints". In contrast, NumPy uses primitive numeric types (floats, ints) which makes storing and computation efficient.


## Hands on!

In [1]:
import numpy as np
import sys

## Basic NumPy arrays

In [124]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [3]:
a = np.array([1,2,3,4])

In [4]:
b = np.array([0, 0.5,1.0, 1.5, 2.0])

In [5]:
a[0], a[3]

(1, 4)

In [6]:
a[0:]

array([1, 2, 3, 4])

In [7]:
a[1:3]

array([2, 3])

In [8]:
a[1:-1]

array([2, 3])

In [9]:
a[::2]

array([1, 3])

In [10]:
b

array([0. , 0.5, 1. , 1.5, 2. ])

In [11]:
b[1:]

array([0.5, 1. , 1.5, 2. ])

In [12]:
b[0], b[2], b[-1]

(0.0, 1.0, 2.0)

In [13]:
b[[0, 2, -1]]

array([0., 1., 2.])

## Array types

In [14]:
a

array([1, 2, 3, 4])

In [15]:
a.dtype

dtype('int32')

In [16]:
b

array([0. , 0.5, 1. , 1.5, 2. ])

In [17]:
b.dtype

dtype('float64')

In [18]:
np.array([1,2,3,4], dtype = np.float)

array([1., 2., 3., 4.])

In [19]:
np.array([1,2,3,4], dtype = np.int8)

array([1, 2, 3, 4], dtype=int8)

In [20]:
c = np.array(['a', 'b', 'c'])
c.dtype

dtype('<U1')

## Dimensions and shapes

In [125]:
A = np.array([[1,2,3],[4,5,6]])
A

array([[1, 2, 3],
       [4, 5, 6]])

In [126]:
A.shape

(2, 3)

In [127]:
A.ndim

2

In [128]:
A.size

6

In [25]:
B = np.array([
    [
        [1,2,3],
        [5,6,7],
    ],
    [
        [1,2,3],
        [4,5,6]
    ]
])

In [26]:
B

array([[[1, 2, 3],
        [5, 6, 7]],

       [[1, 2, 3],
        [4, 5, 6]]])

In [27]:
B.shape

(2, 2, 3)

In [28]:
B.size

12

In [29]:
B.dtype

dtype('int32')

In [30]:
B.ndim

3

## Indexing and slicing of matrices

In [130]:
# Square matrix
A = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [32]:
A[1]

array([4, 5, 6])

In [33]:
A[1][0]

4

In [34]:
A[1,0]

4

In [35]:
A[0:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [131]:
A[:,:2] # A[rows, col(0,1)]

array([[1, 2],
       [4, 5],
       [7, 8]])

In [37]:
A[:2,:2]

array([[1, 2],
       [4, 5]])

In [38]:
A[1] = np.array([11,11,11])
A

array([[ 1,  2,  3],
       [11, 11, 11],
       [ 7,  8,  9]])

## Summary statistics

In [39]:
arr = np.array([1,2,3,4,5])

In [40]:
arr.sum()

15

In [41]:
arr.argmax()

4

In [42]:
arr.mean()

3.0

In [43]:
arr.std()

1.4142135623730951

In [44]:
arr.var()

2.0

In [45]:
A = np.array([
              [1,2,3,4],
              [5,6,7,8],
              [9,10,11,12]
             ])
A

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [46]:
A.mean()

6.5

In [47]:
A.size

12

In [48]:
A.std()

3.452052529534663

In [49]:
A.ndim

2

In [50]:
A.sum()

78

In [51]:
A.argmax()

11

In [52]:
A.sum(axis = 0) #Column-wise

array([15, 18, 21, 24])

In [53]:
A.sum(axis = 1) #Row-wise

array([10, 26, 42])

In [54]:
A.mean(axis = 0)

array([5., 6., 7., 8.])

In [55]:
A.std(axis = 1)

array([1.11803399, 1.11803399, 1.11803399])

And [many more](https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.ndarray.html#array-methods)...

## Broadcasting and Vectorized operations

In [56]:
a = np.arange(4)

In [57]:
a

array([0, 1, 2, 3])

In [58]:
a + 10

array([10, 11, 12, 13])

In [59]:
a * 10

array([ 0, 10, 20, 30])

In [60]:
a

array([0, 1, 2, 3])

In [61]:
a += 100

In [62]:
a

array([100, 101, 102, 103])

In [63]:
a = np.arange(4)
b = np.array([10,10,10,10])

In [64]:
a+b

array([10, 11, 12, 13])

In [65]:
a*b

array([ 0, 10, 20, 30])

## Boolean arrays

In [66]:
a = np.arange(4)

In [67]:
a[0], a[-1]

(0, 3)

In [68]:
a[[0, -1]]

array([0, 3])

In [69]:
a[[True, False, False, True]]

array([0, 3])

In [70]:
a >= 2

array([False, False,  True,  True])

In [71]:
a[a>=2]

array([2, 3])

In [72]:
a.mean()

1.5

In [73]:
a[a > a.mean()]

array([2, 3])

In [74]:
a[~(a > a.mean())]

array([0, 1])

In [75]:
a[(a == 0) | (a == 1)]

array([0, 1])

In [76]:
a[(a%2 == 0) & (a <= 2)]

array([0, 2])

In [77]:
A = np.random.random(3)

In [78]:
A

array([0.97271995, 0.89714298, 0.98284994])

In [79]:
np.random.normal(0)

0.6996155242643963

In [80]:
np.random.seed(1313)
A = np.random.randint(50, size = (3,3))
A

array([[48, 32, 42],
       [33,  6, 43],
       [12, 25,  8]])

In [81]:
np.random.rand(2,4)

array([[0.49741385, 0.27774076, 0.88546349, 0.65512953],
       [0.83098086, 0.06078681, 0.59002568, 0.95418147]])

In [82]:
A > 30

array([[ True,  True,  True],
       [ True, False,  True],
       [False, False, False]])

In [83]:
A[A>30]

array([48, 32, 42, 33, 43])

In [84]:
A

array([[48, 32, 42],
       [33,  6, 43],
       [12, 25,  8]])

In [85]:
A[A>30]

array([48, 32, 42, 33, 43])

In [86]:
A.flatten()

array([48, 32, 42, 33,  6, 43, 12, 25,  8])

In [87]:
A.diagonal()

array([48,  6,  8])

In [88]:
A.reshape(9,1)

array([[48],
       [32],
       [42],
       [33],
       [ 6],
       [43],
       [12],
       [25],
       [ 8]])

## Linear Algebra

In [89]:
# 
A = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]
             ])

In [90]:
B = np.array([[1,2],
              [3,4],
              [5,6]])

In [91]:
A.dot(B) #dot product (Result => size(3,2))

array([[ 22,  28],
       [ 49,  64],
       [ 76, 100]])

In [92]:
A @ B #dot product

array([[ 22,  28],
       [ 49,  64],
       [ 76, 100]])

In [93]:
B.T #Transpose

array([[1, 3, 5],
       [2, 4, 6]])

In [94]:
B.T @ A

array([[48, 57, 66],
       [60, 72, 84]])

<img src="https://docs.google.com/drawings/d/e/2PACX-1vTkDtKYMUVdpfVb3TTpr_8rrVtpal2dOknUUEOu85wJ1RitzHHf5nsJqz1O0SnTt8BwgJjxXMYXyIqs/pub?w=726&h=396" />

## Size of objects in memory
### Int, floats

In [132]:
sys.getsizeof(1)

28

In [133]:
sys.getsizeof(10**1000)

468

In [97]:
# Numpy size is much smaller
np.dtype(int).itemsize

4

In [98]:
# Numpy size is much smaller
np.dtype(np.int8).itemsize

1

In [99]:
np.dtype(float).itemsize

8

### Lists are even larger

In [100]:
sys.getsizeof([1])

64

In [101]:
np.array([1]).nbytes

4

### Performance comparison

In [134]:
l = list(range(10000000))

In [142]:
# a = np.arange(10000000, dtype = 'int64')
a = np.arange(100000000)

In [143]:
%time np.sum(a**2)

Wall time: 259 ms


-1452071552

In [137]:
%time sum([x**2 for x in l])

Wall time: 23 ms


333328333350000

## Useful NumPy functions
### `random`

In [106]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [107]:
np.arange(5,10)

array([5, 6, 7, 8, 9])

In [108]:
np.arange(0,1,.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

---
### `reshape`

In [109]:
np.arange(10).reshape(5,2)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [110]:
np.arange(10).reshape(2,5)

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

---
### `linspace`

In [111]:
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [112]:
a = np.linspace(0,1,20)
print(len(a))
a

20


array([0.        , 0.05263158, 0.10526316, 0.15789474, 0.21052632,
       0.26315789, 0.31578947, 0.36842105, 0.42105263, 0.47368421,
       0.52631579, 0.57894737, 0.63157895, 0.68421053, 0.73684211,
       0.78947368, 0.84210526, 0.89473684, 0.94736842, 1.        ])

In [113]:
a = np.linspace(0,1,20, False)

In [114]:
a

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

---
### `zeros`, `ones

In [115]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [116]:
np.zeros((3,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [117]:
np.zeros((3,4), dtype = np.int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]])

In [118]:
np.ones(3)

array([1., 1., 1.])

In [119]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

### `identity`, `eye`

In [120]:
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [121]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [122]:
np.eye(4,4, k=1)

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 0.]])

In [123]:
np.eye(5,5,k=-2)

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.]])