In [1]:
import numpy as np

## Creating Numpy Arrays from Python Lists

In [2]:
np.array([1, 2, 3, 4])

array([1, 2, 3, 4])

In [3]:
[3.14, 4, 2, 3]

[3.14, 4, 2, 3]

In [4]:
np.array([3.14, 4, 2, 3])

array([3.14, 4.  , 2.  , 3.  ])

In [5]:
np.array([1, 2, 3, 4], dtype = 'float32')

array([1., 2., 3., 4.], dtype=float32)

In [6]:
a1 = np.array([1, 2, 3, 4])

In [7]:
type(a1)

numpy.ndarray

In [9]:
a2 = np.array([[1,2,3],
             [4,5,6]])

In [10]:
type(a2)

numpy.ndarray

In [11]:
a2.shape

(2, 3)

In [12]:
a2.ndim

2

In [13]:
a2.dtype

dtype('int32')

In [14]:
a2.size

6

## Creating Numpy Arrays from Scratch

### 'zero', 'ones', 'full', 'arange', 'linspace'

In [16]:
np.zeros([2,4], dtype = int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [18]:
np.ones([3,5], dtype = int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [19]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [22]:
np.full([3, 5], 6.9)

array([[6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9],
       [6.9, 6.9, 6.9, 6.9, 6.9]])

In [23]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

### 'random'

In [25]:
#Seed for reproducibility
np.random.seed(0)

np.random.random([2,3])

array([[0.5488135 , 0.71518937, 0.60276338],
       [0.54488318, 0.4236548 , 0.64589411]])

In [26]:
np.random.normal(0, 1, [3, 3])

array([[ 0.95008842, -0.15135721, -0.10321885],
       [ 0.4105985 ,  0.14404357,  1.45427351],
       [ 0.76103773,  0.12167502,  0.44386323]])

In [27]:
np.random.randint(0, 10, [4, 5])

array([[2, 3, 8, 1, 3],
       [3, 3, 7, 0, 1],
       [9, 9, 0, 4, 7],
       [3, 2, 7, 2, 0]])

In [28]:
np.random.rand(3, 5) #Tương tụ random nhưng không cần ngoặc vuông

array([[0.0187898 , 0.6176355 , 0.61209572, 0.616934  , 0.94374808],
       [0.6818203 , 0.3595079 , 0.43703195, 0.6976312 , 0.06022547],
       [0.66676672, 0.67063787, 0.21038256, 0.1289263 , 0.31542835]])

## Array Indexing & Slicing

### One-dimensional subarray

In [29]:
x1 = np.random.randint(20, size = 6)

In [30]:
x1

array([12, 10, 11,  4,  6,  4])

In [31]:
x1[4], x1[0], x1[-1]

(6, 12, 4)

### Multi-dimensional subarray

In [32]:
x2 = np.random.randint(10, size = [3,4])

In [33]:
x2

array([[4, 3, 4, 4],
       [8, 4, 3, 7],
       [5, 5, 0, 1]])

In [34]:
x2[1,2]

3

In [35]:
x2[1,3] = 1

In [36]:
x2

array([[4, 3, 4, 4],
       [8, 4, 3, 1],
       [5, 5, 0, 1]])

### Slicing
'x[start:stop:step]'

In [37]:
x1

array([12, 10, 11,  4,  6,  4])

In [38]:
x1[0:3]

array([12, 10, 11])

In [40]:
x1[2:4]

array([11,  4])

In [41]:
x1[::2]

array([12, 11,  6])

In [42]:
x2

array([[4, 3, 4, 4],
       [8, 4, 3, 1],
       [5, 5, 0, 1]])

In [43]:
x2[:2, :3]

array([[4, 3, 4],
       [8, 4, 3]])

In [44]:
x2[:2]

array([[4, 3, 4, 4],
       [8, 4, 3, 1]])

In [45]:
x2[:, :2]

array([[4, 3],
       [8, 4],
       [5, 5]])

## Reshaping of Arrays & Tranpose

In [47]:
grid = np.arange(1, 10)
grid.shape

(9,)

In [48]:
grid.reshape((3,3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [49]:
x = np.array([[1, 2, 3], [4, 5, 6]])

In [50]:
x

array([[1, 2, 3],
       [4, 5, 6]])

In [51]:
x.T

array([[1, 4],
       [2, 5],
       [3, 6]])

## Array Concatenation & Splitting

In [52]:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])

In [53]:
np.concatenate((x, y))

array([1, 2, 3, 4, 5, 6])

In [54]:
grid = np.array([[1,2,3],
                [4,5,6]])
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [55]:
np.concatenate((grid, grid)) #nối theo chiều ngang

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [56]:
np.concatenate((grid, grid), axis = 1) #nối theo chiều dọc

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [57]:
#vertical stack: np.vstack()
x = np.array([1, 2, 3])

grid = np.array([[4,5,6],
                [7,8,9]])

np.vstack((x, grid))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [58]:
#horizontal stack: np.hstack()
y = np.array([[99],
             [99]])

np.hstack((y, grid))

array([[99,  4,  5,  6],
       [99,  7,  8,  9]])

### Splitting of arrays

In [59]:
x = np.array([1,2,3,96,69,3,2,1])

In [60]:
x1, x2, x3 = np.split(x, [3, 5])

In [61]:
x1

array([1, 2, 3])

In [62]:
x2

array([96, 69])

In [63]:
x3

array([3, 2, 1])

## Broadcasting and Vectorized operations

In [66]:
a = np.arange(3)
a

array([0, 1, 2])

In [67]:
a + 5 #broadcasting

array([5, 6, 7])

In [68]:
b = np.ones([3, 3], dtype = int)
b

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [69]:
a + b

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [70]:
a * b

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

In [71]:
c = np.arange(3).reshape(3,1)
c

array([[0],
       [1],
       [2]])

In [72]:
a + c

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

## Manipulating & Comparing Arrays

### Aggregation
Aggregation = performing the same operation on a number of things

In [73]:
list_number = [1,2,3]

In [74]:
ll = np.array(list_number)

In [75]:
ll

array([1, 2, 3])

In [76]:
sum(ll) #Python sum()

6

In [77]:
np.sum(ll) #Numpy sum() --> Faster

6

In [78]:
#Create a massive Numpy array
massive_array = np.random.random(10000)
massive_array[:5]

array([0.77834548, 0.94957105, 0.66252687, 0.01357164, 0.6228461 ])

In [79]:
%timeit sum(massive_array) #Python built-in function sum()
%timeit np.sum(massive_array) #Numpy's np.sum()

1.88 ms ± 414 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
21 µs ± 2.52 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [80]:
np.mean(massive_array)

0.5014384851964334

In [81]:
np.min(massive_array)

2.0027530115096503e-05

In [82]:
np.max(massive_array)

0.9999709462497284

In [84]:
dog_height = [600, 470, 170, 430, 300]
dog_height = np.array(dog_height)

np.std(dog_height)

147.32277488562318

In [85]:
np.var(dog_height)

21704.0

## Sorting Arrays

np.sort uses an quicksort algorithm

In [86]:
x = np.array([2,1,4,3,5])

In [87]:
np.sort(x)

array([1, 2, 3, 4, 5])

In [88]:
#A related function is 'argsort', which is instead returns the indices of the sorted elements
np.argsort(x) # --> Trả về index của data

array([1, 0, 3, 2, 4], dtype=int64)

### Sorting along rows or columns
NumPy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the axis argument

In [89]:
np.random.seed(42)

MatA = np.random.randint(0, 10, size = (4, 6))

In [90]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [91]:
np.sort(MatA, axis = 0) #Sort theo cột

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [92]:
np.sort(MatA, axis = 1) #Sort theo hàng

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

## Linear Algebra (Đại số tuyến tính)

In [93]:
A = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])

In [94]:
B = np.array([[6,5],
              [4,3],
              [2,1]])

In [95]:
#Tích vô hướng của A với B
#A (3x3) dot product B (3x2)
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [96]:
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [97]:
#B (3x2) dot product A (3x3) --> Not
#B.T (2x3) dot product A (3x3)

B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

### Dot Product Example

In [98]:
#Numbers of jars sold
np.random.seed(0)

sales_amounts = np.random.randint(20, size = (5, 3))

In [99]:
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [100]:
#Create weekly_sales dataframe
import pandas as pd

weekly_sales = pd.DataFrame(sales_amounts, index = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri'], #Dán nhãn hàng
                                           columns = ['Almond Butter', 'Peanut Butter', 'Cashew Butter']) #Dán nhãn cột

In [101]:
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thu,4,6,12
Fri,1,6,7


In [102]:
#Create a price arrray

prices = np.array([10,8,12])

In [105]:
butter_prices = pd.DataFrame(prices, index = ['Almond Butter', 'Peanut Butter', 'Cashew Butter'], 
                                                columns = ['Price'])

In [106]:
butter_prices

Unnamed: 0,Price
Almond Butter,10
Peanut Butter,8
Cashew Butter,12


In [107]:
weekly_sales.shape, butter_prices.shape

((5, 3), (3, 1))

In [108]:
total_prices = weekly_sales.dot(butter_prices)

In [109]:
total_prices

Unnamed: 0,Price
Mon,240
Tue,138
Wed,458
Thu,232
Fri,142


In [110]:
weekly_sales['Total Prices'] = total_prices
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Total Prices
Mon,12,15,0,240
Tue,3,3,7,138
Wed,9,19,18,458
Thu,4,6,12,232
Fri,1,6,7,142
