# Numpy 

## Import Pacakges 

In [2]:
import numpy as np 

## Basics

### First steps 

In [3]:
a = np.arange(5)

In [4]:
a

array([0, 1, 2, 3, 4])

In [5]:
a.shape # tuple with the dimension of the 

(5,)

In [6]:
a.dtype # int64 for 64 bits machine int32 for 32 bits machine 

dtype('int64')

In [13]:
# Quick overview of types 
print(np.float64(31))
print(bool(0))
print(bool(42))
print(a.dtype.itemsize) # size in bytes of the object 

31.0
False
True
8


In [8]:
# sum of 2 arrays 
b = np.arange(5)
s = a + b
s

array([0, 2, 4, 6, 8])

### Slicing 

In [17]:
# one dimension slicing 
a = np.arange(9)
a[2]

2

In [18]:
a[2:7] 

array([2, 3, 4, 5, 6])

In [20]:
a[::2] # step slicing

array([0, 2, 4, 6, 8])

In [21]:
a[::-1] # reverse arrray

array([8, 7, 6, 5, 4, 3, 2, 1, 0])

### Manipulating shapes 

In [23]:
b = a.reshape(3,3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [26]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [27]:
a.resize(3,3) # directly modify the array return None

In [28]:
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [24]:
b.shape

(5,)

In [25]:
b.ravel() # flatten the array

array([0, 1, 2, 3, 4])

### Stacking array 

In [40]:
a = np.arange(16).reshape(4,4)
b = 2*a
col_stack = np.hstack((a,b)) # same as concatenate((a, b), axis=1)
col_stack

array([[ 0,  1,  2,  3,  0,  2,  4,  6],
       [ 4,  5,  6,  7,  8, 10, 12, 14],
       [ 8,  9, 10, 11, 16, 18, 20, 22],
       [12, 13, 14, 15, 24, 26, 28, 30]])

In [41]:
row_stack = np.vstack((a,b)) # same as concatenate((a, b), axis=0)
row_stack

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [ 0,  2,  4,  6],
       [ 8, 10, 12, 14],
       [16, 18, 20, 22],
       [24, 26, 28, 30]])

### Splitting array

In [45]:
np.split(col_stack,4,axis = 1) # split over columns 

[array([[ 0,  1],
        [ 4,  5],
        [ 8,  9],
        [12, 13]]), array([[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]]), array([[ 0,  2],
        [ 8, 10],
        [16, 18],
        [24, 26]]), array([[ 4,  6],
        [12, 14],
        [20, 22],
        [28, 30]])]

In [46]:
np.split(row_stack,2,axis = 0) # split on rows 

[array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]]), array([[ 0,  2,  4,  6],
        [ 8, 10, 12, 14],
        [16, 18, 20, 22],
        [24, 26, 28, 30]])]

### Array attributes

In [48]:
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [49]:
a.ndim # dimension of the array

2

In [50]:
a.size # size (nb elements of the array)

16

In [51]:
a.itemsize

8

In [54]:
a.reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [53]:
a.reshape(4,4).T# transpose 

array([[ 0,  4,  8, 12],
       [ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15]])

In [55]:
a.tolist() # convert an array 

[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]

## Basic Data Analysis with numpy 

## Tips and Tricks 

### Learning to avoid unnecessary array copies

Avoiding making unnecessary copies will increase speed by decreasing memory utilization 

In [2]:
# Copy and memory insights 

a = np.zeros(100)
b = a.copy()

In [3]:
# Get numpy id 
def id_numpy(x):
    # This function returns the memory
    # block address of an array.
    return x.__array_interface__['data'][0]

In [4]:
id_numpy(a) == id_numpy(b) # False 

False

In [5]:
id_numpy(a), id_numpy(a[1:])

(4298858432, 4298858440)

In [6]:
??aid

Object `aid` not found.


In [10]:
a = np.arange(10)
aid = id(a)
a *= 2 # in-place operations better than b = a * 2
id(a) == aid

True

In [14]:
b = np.arange(10,10)

In [15]:
b

array([], dtype=int64)

In [13]:
a

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [23]:
c = np.random.rand(3,3)

In [24]:
c

array([[ 0.12432581,  0.32510353,  0.18580317],
       [ 0.69057854,  0.02411758,  0.81901501],
       [ 0.44248262,  0.54533385,  0.71953285]])

In [25]:
e = c.ravel() #flatten an array don't create copy faster than flatten

In [26]:
e

array([ 0.12432581,  0.32510353,  0.18580317,  0.69057854,  0.02411758,
        0.81901501,  0.44248262,  0.54533385,  0.71953285])

In [31]:
i, j = 100000, 100 # i nb_rows, j nb_cols
a = np.random.random_sample((n, d))
aid = id(a)

In [29]:
a.shape

(100000, 100)

In [32]:
??np.random.random_sample

In [34]:
a[10000]

array([ 0.21901508,  0.61956116,  0.79695437,  0.99268274,  0.43051421,
        0.7431374 ,  0.26974219,  0.726856  ,  0.7248749 ,  0.70549282,
        0.2976876 ,  0.29840003,  0.49665092,  0.91112655,  0.4207265 ,
        0.08243329,  0.45939381,  0.31160615,  0.14605972,  0.90025253,
        0.68054478,  0.93962258,  0.85799518,  0.59557884,  0.00684147,
        0.76342033,  0.93523988,  0.69109801,  0.61796816,  0.93541026,
        0.47722073,  0.9879511 ,  0.19510311,  0.72683171,  0.070411  ,
        0.36043908,  0.95585217,  0.95433029,  0.27852615,  0.83695488,
        0.86838693,  0.54752769,  0.43808096,  0.89693089,  0.04080786,
        0.84350648,  0.8441064 ,  0.94219044,  0.31862449,  0.43192861,
        0.69014774,  0.05666424,  0.54137644,  0.46277258,  0.72341757,
        0.4063484 ,  0.93828304,  0.84715568,  0.66563593,  0.29931358,
        0.76506626,  0.75173035,  0.71917824,  0.52403388,  0.88337655,
        0.65383641,  0.50603911,  0.08895481,  0.67556346,  0.96

In [40]:
a[::10].shape # select one row every ten rows faster than a[np.arange(0, n, 10)]

(10000, 100)

### Fancy indexing