# NumPy
Pour Numerical Python. La librairie tourne autour d'un type d'objet : ARRAY. Array qui signifie tableau, NumPy nous permet de générer des arrays à N-dimensions (1D, 2D, 3D etc.). Les arrays (à 1 dimension) sont très semblables aux listes et nous allons voir que les méthodes d'indexing et slicing sont similaires quelque soit la dimension de nos arrays.
<br>Le format array permet des exécutions plus rapides avec de meilleurs calculs scientifiques. De plus ce format est indispensable pour le machine learning : vos données doivent être "présentées" aux modèles sous ce format la plupart du temps.

## Installer et import NumPy

In [1]:
# !pip install numpy
import numpy as np

## 1. Les Arrays
Comprendre la construction des Arrays (tableaux)

In [2]:
#1D
arr_1d = np.array([120, 170, 75, 160, 80, 165, 50])
arr_1d

np.array([120, 170.0, 75, 160, 80, 165, 50])

#2D
arr_2d = np.array([[120, 170, 75, 160, 80, 165, 50],
                   [130, 180, 85, 170, 90, 175, 60]])
arr_2d

#3D
arr_3d = np.array([[[120, 170, 75, 160, 80, 165, 50],
                   [130, 180, 85, 170, 90, 175, 60]],
                   [[120, 170, 75, 160, 80, 165, 50],
                   [130, 180, 85, 170, 90, 175, 60]]])
arr_3d

array([[[120, 170,  75, 160,  80, 165,  50],
        [130, 180,  85, 170,  90, 175,  60]],

       [[120, 170,  75, 160,  80, 165,  50],
        [130, 180,  85, 170,  90, 175,  60]]])

In [3]:
#size shape ndim dtype
for arr in [arr_1d, arr_2d, arr_3d]:
    print(arr.size)
    print(arr.shape)
    print(arr.ndim)
    print(arr.dtype)
    print('---')

7
(7,)
1
int32
---
14
(2, 7)
2
int32
---
28
(2, 2, 7)
3
int32
---


Un plus grand nombre de bits pour nos données permet d'être plus précis cependant moins rapide en calculs. A l'inverse un plus petit nombre de bits permet d'être plus rapide en étant moins précis.

In [4]:
np.array([1, 2, 3], dtype=np.int8).dtype

dtype('int8')

In [5]:
np.zeros((2, 3, 7))
np.ones((2, 3))
np.full((2, 2), 99)

array([[99, 99],
       [99, 99]])

In [6]:
np.arange(3, 10)
np.linspace(2, 10, 120)

array([ 2.        ,  2.06722689,  2.13445378,  2.20168067,  2.26890756,
        2.33613445,  2.40336134,  2.47058824,  2.53781513,  2.60504202,
        2.67226891,  2.7394958 ,  2.80672269,  2.87394958,  2.94117647,
        3.00840336,  3.07563025,  3.14285714,  3.21008403,  3.27731092,
        3.34453782,  3.41176471,  3.4789916 ,  3.54621849,  3.61344538,
        3.68067227,  3.74789916,  3.81512605,  3.88235294,  3.94957983,
        4.01680672,  4.08403361,  4.1512605 ,  4.21848739,  4.28571429,
        4.35294118,  4.42016807,  4.48739496,  4.55462185,  4.62184874,
        4.68907563,  4.75630252,  4.82352941,  4.8907563 ,  4.95798319,
        5.02521008,  5.09243697,  5.15966387,  5.22689076,  5.29411765,
        5.36134454,  5.42857143,  5.49579832,  5.56302521,  5.6302521 ,
        5.69747899,  5.76470588,  5.83193277,  5.89915966,  5.96638655,
        6.03361345,  6.10084034,  6.16806723,  6.23529412,  6.30252101,
        6.3697479 ,  6.43697479,  6.50420168,  6.57142857,  6.63

### Random

In [7]:
np.random.rand(12, 2, 3)

array([[[0.87114381, 0.5887113 , 0.15186172],
        [0.98199645, 0.41459061, 0.03518515]],

       [[0.50355046, 0.89510676, 0.860037  ],
        [0.09368977, 0.72080686, 0.06178475]],

       [[0.04305536, 0.53337073, 0.21971323],
        [0.18857169, 0.54590905, 0.2932466 ]],

       [[0.72690018, 0.55487708, 0.82194901],
        [0.92220603, 0.34248427, 0.02653625]],

       [[0.6687318 , 0.00623722, 0.47293309],
        [0.63674116, 0.55196412, 0.78749713]],

       [[0.83025006, 0.31313236, 0.74444217],
        [0.37557955, 0.63773811, 0.96169695]],

       [[0.61045419, 0.23218187, 0.58371161],
        [0.26799383, 0.68403473, 0.4301562 ]],

       [[0.36079373, 0.48820115, 0.50557889],
        [0.49470291, 0.92682704, 0.61619725]],

       [[0.47446503, 0.78562399, 0.91758229],
        [0.46172898, 0.28920662, 0.72406176]],

       [[0.10209425, 0.47200186, 0.32887197],
        [0.77039381, 0.36707099, 0.78297379]],

       [[0.46450054, 0.8716701 , 0.5729209 ],
        [0.229

In [8]:
np.random.randint(12, size=13)
np.random.randint(4, 16, size=(4, 5))

array([[ 5,  9, 12,  5, 15],
       [15,  4, 12, 10,  5],
       [10, 12,  5,  7, 15],
       [ 7, 14, 13,  4, 11]])

In [9]:
np.random.randn(4, 4)

array([[ 0.09512762, -0.49110123, -0.13584203, -1.48515846],
       [ 0.89790545, -0.29797446, -0.31939117,  0.67278612],
       [-0.3452536 ,  0.15386222, -0.33777647, -0.49168985],
       [ 2.47540206,  1.54717301, -0.10935833,  0.76643302]])

In [10]:
np.random.random_sample((2, 6))

array([[0.88745879, 0.77982387, 0.26418012, 0.23336208, 0.74038394,
        0.89419087],
       [0.80489264, 0.24085294, 0.87167647, 0.59812531, 0.14765958,
        0.25441759]])

In [11]:
arr_sh = np.random.randint(2, 8, 8)
print(arr_sh)

np.random.shuffle(arr_sh)
arr_sh

[3 7 3 7 5 6 7 4]


array([5, 7, 4, 7, 7, 3, 6, 3])

In [12]:
from numpy import random

In [13]:
random.randint(2, 8, 8)

array([6, 7, 2, 4, 4, 2, 4, 3])

In [14]:
from numpy.random import randint

In [15]:
randint(2, 8, 8)

array([5, 5, 2, 5, 5, 6, 2, 6])

## 2. Assemblage & Manipulations

In [16]:
arr_1d = np.array([120, 170, 75, 160, 80, 165, 50])
arr_1d2 = np.array([130, 180, 85, 170, 90, 175, 60])

In [17]:
np.vstack([arr_1d, arr_1d2])
np.hstack([arr_1d, arr_1d2])

np.vstack(arr_1d)

arr_hs = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
display(arr_hs)

np.hstack(arr_hs)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [18]:
display(arr_1d)
display(arr_1d2)

array([120, 170,  75, 160,  80, 165,  50])

array([130, 180,  85, 170,  90, 175,  60])

In [19]:
np.column_stack([arr_1d, arr_1d2])
np.row_stack([arr_1d, arr_1d2])

array([[120, 170,  75, 160,  80, 165,  50],
       [130, 180,  85, 170,  90, 175,  60]])

In [20]:
arr_rsh = np.random.randint(1, 12, 8)
display(arr_rsh)
display(arr_rsh.reshape(4, 2))
display(arr_rsh.reshape(2, 4))
display(arr_rsh.reshape(2, 2, 2))
display(arr_rsh.reshape(4, 3))

array([4, 1, 1, 2, 3, 5, 3, 9])

array([[4, 1],
       [1, 2],
       [3, 5],
       [3, 9]])

array([[4, 1, 1, 2],
       [3, 5, 3, 9]])

array([[[4, 1],
        [1, 2]],

       [[3, 5],
        [3, 9]]])

ValueError: cannot reshape array of size 8 into shape (4,3)

In [21]:
np.ravel(arr_rsh.reshape(2, 2, 2))

array([4, 1, 1, 2, 3, 5, 3, 9])

In [22]:
display(arr_sh.reshape(4, 2))
np.transpose(arr_sh.reshape(4, 2))

array([[5, 7],
       [4, 7],
       [7, 3],
       [6, 3]])

array([[5, 4, 7, 6],
       [7, 7, 3, 3]])

In [23]:
np.sort(arr_rsh)

array([1, 1, 2, 3, 3, 4, 5, 9])

In [24]:
display(arr_2d)
display(np.sort(arr_2d, axis=1))
display(np.sort(arr_2d, axis=0))

array([[120, 170,  75, 160,  80, 165,  50],
       [130, 180,  85, 170,  90, 175,  60]])

array([[ 50,  75,  80, 120, 160, 165, 170],
       [ 60,  85,  90, 130, 170, 175, 180]])

array([[120, 170,  75, 160,  80, 165,  50],
       [130, 180,  85, 170,  90, 175,  60]])

In [25]:
arr_2d2 = np.array([[130, 180,  85, 170,  90, 175,  60],
                    [120, 170,  75, 160,  80, 165,  50]])
np.sort(arr_2d2, axis=0)

array([[120, 170,  75, 160,  80, 165,  50],
       [130, 180,  85, 170,  90, 175,  60]])

## 3. Indexing & Slicing

### 1D

In [26]:
arr_1d

array([120, 170,  75, 160,  80, 165,  50])

In [27]:
arr_1d[1]
arr_1d[-2]
arr_1d[2:-1]
arr_1d[1:4]

array([170,  75, 160])

### 2D

In [28]:
arr_2d

array([[120, 170,  75, 160,  80, 165,  50],
       [130, 180,  85, 170,  90, 175,  60]])

In [29]:
arr_2d[1, 2]
arr_2d[:, 2]
arr_2d[1, 3:6]

array([170,  90, 175])

### 3D

In [30]:
arr_3d

array([[[120, 170,  75, 160,  80, 165,  50],
        [130, 180,  85, 170,  90, 175,  60]],

       [[120, 170,  75, 160,  80, 165,  50],
        [130, 180,  85, 170,  90, 175,  60]]])

In [31]:
arr_3d[0, 0, 0]
arr_3d[:, 1, 3]
arr_3d[1, 0, 2:5]

array([ 75, 160,  80])

### Remplacement

In [32]:
#Remplace tout le slicing
arr_1d[:2] = 99
arr_1d

arr_2d[1, 3:6] = 88
arr_2d

array([[120, 170,  75, 160,  80, 165,  50],
       [130, 180,  85,  88,  88,  88,  60]])

In [33]:
#Remplacement détaillé => doit être de la même dimension
arr_2d[:, 2:5] = [[11, 22, 33], [44, 55, 66]]
arr_2d

array([[120, 170,  11,  22,  33, 165,  50],
       [130, 180,  44,  55,  66,  88,  60]])

### Boolean indexing

In [34]:
display(arr_1d)
arr_1d < 80

array([ 99,  99,  75, 160,  80, 165,  50])

array([False, False,  True, False, False, False,  True])

In [35]:
arr_1d[arr_1d < 80]

array([75, 50])

In [36]:
mask = arr_1d < 80
arr_1d[mask]

array([75, 50])

In [37]:
#Plusieurs conditions possibles and or not
arr_1d[(arr_1d < 80) & (arr_1d > 65)]

array([75])

In [38]:
arr_1d[mask] = 111
arr_1d

array([ 99,  99, 111, 160,  80, 165, 111])

# 4. Statistiques et Mathématiques

In [39]:
arr_stat_1d = np.array([90, 160, 45, 155, 55, 145])
arr_stat_2d = np.array([[90, 160, 45, 155, 55, 145],
                        [80, 170, 35, 145, 45, 135]])

In [40]:
np.sum(arr_stat_1d)
arr_stat_1d.sum()

650

In [41]:
display(arr_stat_2d)
display(np.sum(arr_stat_2d))
display(np.sum(arr_stat_2d, axis=0))
display(np.sum(arr_stat_2d, axis=1))
display(np.sum(arr_stat_2d, axis=1, keepdims=True))

array([[ 90, 160,  45, 155,  55, 145],
       [ 80, 170,  35, 145,  45, 135]])

1260

array([170, 330,  80, 300, 100, 280])

array([650, 610])

array([[650],
       [610]])

In [42]:
np.round(np.array([1.54, 1.45]))
np.floor(np.array([1.54, 1.45]))
np.ceil(np.array([1.54, 1.45]))

array([2., 2.])

In [43]:
np.mean(arr_stat_1d)
np.average(arr_stat_1d)
np.median(arr_stat_1d)
np.var(arr_stat_1d)
np.std(arr_stat_1d)
np.cumsum(arr_stat_1d)
np.prod(arr_stat_1d)

2145082944

In [44]:
arr_stat_2d

array([[ 90, 160,  45, 155,  55, 145],
       [ 80, 170,  35, 145,  45, 135]])

In [45]:
np.mean(arr_stat_2d, axis=0)
np.average(arr_stat_2d, axis=0)
np.median(arr_stat_2d, axis=0)
np.var(arr_stat_2d, axis=0)
np.std(arr_stat_2d, axis=0)
np.cumsum(arr_stat_2d, axis=0)
np.prod(arr_stat_2d, axis=0)

array([ 7200, 27200,  1575, 22475,  2475, 19575])

In [46]:
np.mean(arr_stat_2d, axis=1, keepdims=True)
np.average(arr_stat_2d, axis=1, keepdims=True)
np.median(arr_stat_2d, axis=1, keepdims=True)
np.var(arr_stat_2d, axis=1, keepdims=True)
np.std(arr_stat_2d, axis=1, keepdims=True)
np.cumsum(arr_stat_2d, axis=1)
np.prod(arr_stat_2d, axis=1, keepdims=True)

array([[ 2145082944],
       [-1610295008]])

In [47]:
np.prod(np.array([ 80, 170,  35, 145,  45, 135], dtype=np.int64))

419296500000

In [48]:
display(np.percentile(arr_stat_1d, 10))
display(np.quantile(arr_stat_1d, .1))

50.0

50.0

In [49]:
for qp in [10, 15, 34, 88]:
    display(np.percentile(arr_stat_1d, qp))
    display(np.quantile(arr_stat_1d, qp/100))
    print('---')

50.0

50.0

---


52.5

52.5

---


79.5

79.5

---


157.0

157.0

---


In [50]:
display(arr_stat_2d)
np.corrcoef(arr_stat_2d)

array([[ 90, 160,  45, 155,  55, 145],
       [ 80, 170,  35, 145,  45, 135]])

array([[1.       , 0.9919353],
       [0.9919353, 1.       ]])

In [51]:
display(np.random.randint(40, 200, (5, 3)))
np.corrcoef(np.random.randint(40, 200, (5, 3)))

array([[ 45, 101,  94],
       [ 41, 151,  84],
       [197,  56, 177],
       [ 60, 176, 111],
       [ 49,  79, 105]])

array([[ 1.        ,  0.36066937, -0.53663867, -0.60273518,  0.93098354],
       [ 0.36066937,  1.        ,  0.59346917, -0.96162296, -0.00471301],
       [-0.53663867,  0.59346917,  1.        , -0.34986154, -0.80764481],
       [-0.60273518, -0.96162296, -0.34986154,  1.        , -0.26983916],
       [ 0.93098354, -0.00471301, -0.80764481, -0.26983916,  1.        ]])

In [52]:
np.unique(arr_stat_1d)
display(arr_stat_2d)
np.unique(arr_stat_2d, axis=0)
np.unique(arr_stat_2d, axis=1)

array([[ 90, 160,  45, 155,  55, 145],
       [ 80, 170,  35, 145,  45, 135]])

array([[ 45,  55,  90, 145, 155, 160],
       [ 35,  45,  80, 135, 145, 170]])

In [53]:
np.unique(arr_stat_1d, return_counts=True)

(array([ 45,  55,  90, 145, 155, 160]), array([1, 1, 1, 1, 1, 1], dtype=int64))

In [54]:
np.sqrt(arr_stat_2d)
np.exp(arr_stat_2d)
np.log(arr_stat_2d)

np.log(np.array([0, 1, 2, 3]))
display(np.log1p(np.array([0, 1, 2, 3])))
display(np.log(np.array([1, 2, 3, 4])))

  np.log(np.array([0, 1, 2, 3]))


array([0.        , 0.69314718, 1.09861229, 1.38629436])

array([0.        , 0.69314718, 1.09861229, 1.38629436])

In [58]:
arr_neg = np.random.randint(-25, 25, (4, 3))
display(arr_neg)

np.abs(arr_neg)

array([[-19,  14, -18],
       [ 17,   8,  23],
       [-11,   0,  20],
       [-18, -23,  -9]])

array([[19, 14, 18],
       [17,  8, 23],
       [11,  0, 20],
       [18, 23,  9]])

In [67]:
display(arr_stat_1d)
np.add(arr_stat_1d, 10) # +
np.subtract(arr_stat_1d, 10) # -
np.multiply(arr_stat_1d, 10) # *
np.divide(arr_stat_1d, 10) # /

arr_stat_1d // 9
arr_stat_1d % 9

np.power(arr_stat_1d, 2) # **

array([ 90, 160,  45, 155,  55, 145])

array([ 8100, 25600,  2025, 24025,  3025, 21025], dtype=int32)

In [69]:
np.add(arr_stat_2d, 10)
arr_stat_2d % 9

array([[0, 7, 0, 2, 1, 1],
       [8, 8, 8, 1, 0, 0]], dtype=int32)

In [70]:
np.multiply(arr_stat_1d, arr_stat_1d)

array([ 8100, 25600,  2025, 24025,  3025, 21025])

In [73]:
display(arr_stat_2d)
display(arr_stat_1d)
np.multiply(arr_stat_2d, arr_stat_1d)

array([[ 90, 160,  45, 155,  55, 145],
       [ 80, 170,  35, 145,  45, 135]])

array([ 90, 160,  45, 155,  55, 145])

array([[ 8100, 25600,  2025, 24025,  3025, 21025],
       [ 7200, 27200,  1575, 22475,  2475, 19575]])

In [75]:
np.add(arr_stat_2d, arr_stat_1d)
np.add(arr_stat_1d, arr_stat_2d)

array([[180, 320,  90, 310, 110, 290],
       [170, 330,  80, 300, 100, 280]])

In [87]:
np.add(arr_stat_2d, np.array([23, 33, 43]))

ValueError: operands could not be broadcast together with shapes (2,6) (3,) 

### NaN : Not a Number

In [81]:
np.nan
arr_nan = np.array([np.nan, 1, 23, np.nan])
arr_nan
np.mean(arr_nan)
np.nanmean(arr_nan)

np.percentile(arr_nan, 20)
np.nanpercentile(arr_nan, 20)

5.4

In [86]:
np.isnan(arr_nan)
np.isnan(arr_nan).sum()
np.sum(np.isnan(arr_nan))
np.isnan(arr_nan).mean() *100

50.0