# Numpy: Numeric computing library
NumPy (Numerical Python) is one of the core packages for numerical computing in Python. Pandas, Matplotlib, Statmodels and many other Scientific libraries rely on NumPy.

NumPy major contributions are:

Efficient numeric computation with C primitives
Efficient collections with vectorized operations
An integrated and natural Linear Algebra API
A C API for connecting NumPy with libraries written in C, C++, or FORTRAN.
Let's develop on efficiency. In Python, everything is an object, which means that even simple ints are also objects, with all the required machinery to make object work. We call them "Boxed Ints". In contrast, NumPy uses primitive numeric types (floats, ints) which makes storing and computation efficient.



In [1]:
import numpy as np
import sys

# Create numpy arrays from list

In [2]:
np.array([1,2,3,5,6,7])

array([1, 2, 3, 5, 6, 7])

In [3]:
[3.458,1,3,5] # list hì cho phép tồn tại nhiều kiểu dữ liệu trong 1 list

[3.458, 1, 3, 5]

In [4]:
np.array([3.458,1,3,5]) # nhưng array thì đưa hết về kiểu

array([3.458, 1.   , 3.   , 5.   ])

In [8]:
np.array([3.0,1,3,5]).dtype # từ ví dụ này thì suy ra là có float thì đưa hết về float


dtype('float64')

In [7]:
np.array([3.458,1,3,5], dtype="float32") # chuẩn hóa toàn bộ về 1 kiểu 

array([3.458, 1.   , 3.   , 5.   ], dtype=float32)

In [14]:
arr=np.array([[3.43,75,673], [2837,7.4,-842]],dtype="float64")

In [20]:
print(arr)
type(arr)

[[   3.43   75.    673.  ]
 [2837.      7.4  -842.  ]]


numpy.ndarray

In [16]:
arr.shape # trả về kích thước của  array

(2, 3)

In [17]:
arr.size # số lượng phần tử 

6

In [18]:
arr.ndim # số chiều

2

In [19]:
arr.dtype # kiểu dữ liệu có trong array

dtype('float64')

# Create numpy arrays from Scratch

## zeros, ones,full,arange,linspace

In [21]:
np.zeros((3,3),dtype="int32") # tạo array toàn là số 0, với kiểu dữ liệu là int32

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [22]:
np.ones((3,3),dtype="int32") # tạo array toàn là số 1, với kiểu dữ liệu là int 32

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [25]:
# tạo 1 mảng tuyến tính 
np.arange(-100,109,1) # (start, stop, step)

array([-100,  -99,  -98,  -97,  -96,  -95,  -94,  -93,  -92,  -91,  -90,
        -89,  -88,  -87,  -86,  -85,  -84,  -83,  -82,  -81,  -80,  -79,
        -78,  -77,  -76,  -75,  -74,  -73,  -72,  -71,  -70,  -69,  -68,
        -67,  -66,  -65,  -64,  -63,  -62,  -61,  -60,  -59,  -58,  -57,
        -56,  -55,  -54,  -53,  -52,  -51,  -50,  -49,  -48,  -47,  -46,
        -45,  -44,  -43,  -42,  -41,  -40,  -39,  -38,  -37,  -36,  -35,
        -34,  -33,  -32,  -31,  -30,  -29,  -28,  -27,  -26,  -25,  -24,
        -23,  -22,  -21,  -20,  -19,  -18,  -17,  -16,  -15,  -14,  -13,
        -12,  -11,  -10,   -9,   -8,   -7,   -6,   -5,   -4,   -3,   -2,
         -1,    0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
         10,   11,   12,   13,   14,   15,   16,   17,   18,   19,   20,
         21,   22,   23,   24,   25,   26,   27,   28,   29,   30,   31,
         32,   33,   34,   35,   36,   37,   38,   39,   40,   41,   42,
         43,   44,   45,   46,   47,   48,   49,   

In [28]:
np.arange(-100,109,1).ndim

1

In [26]:
np.full((3,3),13,dtype="float32") # giống với hàm ones, zeros nhưng là điền vào bằng các số khác

array([[13., 13., 13.],
       [13., 13., 13.],
       [13., 13., 13.]], dtype=float32)

In [30]:
np.linspace(0,1,11) # linspace= linear space, hàm này cũng tạo mảng chia đều (start,stop,số phần tử) 

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

## random

In [33]:
np.random.random((3,3)) #Return random floats in the half-open interval [0.0, 1.0)

array([[0.53935084, 0.59235725, 0.84807666],
       [0.32796835, 0.13842076, 0.71996916],
       [0.46661705, 0.79325138, 0.93334361]])

In [44]:
np.random.rand(3,3) # anh em sinh đôi với random.random

array([[0.16130952, 0.65310833, 0.2532916 ],
       [0.46631077, 0.24442559, 0.15896958],
       [0.11037514, 0.65632959, 0.13818295]])

In [36]:
#seed for reproducibility
np.random.seed(0) # cố định các số cố được random ra
np.random.random((3,3))

array([[0.5488135 , 0.71518937, 0.60276338],
       [0.54488318, 0.4236548 , 0.64589411],
       [0.43758721, 0.891773  , 0.96366276]])

In [39]:
np.random.normal(0,1,(3,3)) # (giá trị trung bình, độ lệch chuẩn, lích thước)

array([[ 0.37692697,  0.03343893,  0.68056724],
       [-1.56349669, -0.56669762, -0.24214951],
       [ 1.51439128, -0.3330574 ,  0.04736482]])

In [42]:
np.random.randint(0,10,(3,3),dtype="int32") # (low,high,size,dtype)

array([[0, 3, 5],
       [9, 4, 4],
       [6, 4, 4]])

# Array indexing and slicing

## one dimensional subarray

In [10]:
ar1=np.random.randint(0,100,size=9)

In [11]:
ar1

array([75, 55, 92, 88, 63, 36, 18, 43, 84])

## multi-dimensional subarray

In [16]:
ar2=np.random.randint(0,100,size=(5,5))

In [17]:
ar2

array([[90, 91, 76, 89, 47],
       [94, 29, 97, 86, 90],
       [49, 67, 84,  6, 83],
       [24, 91, 37, 86, 76],
       [51, 96, 78, 34, 67]])

In [18]:
ar2[2][2]

84

In [19]:
ar2[2,2] # là 1 cách gọi khác thôi

84

In [20]:
ar2[2,2]=55 # thay đổi giá trị

In [21]:
ar2

array([[90, 91, 76, 89, 47],
       [94, 29, 97, 86, 90],
       [49, 67, 55,  6, 83],
       [24, 91, 37, 86, 76],
       [51, 96, 78, 34, 67]])

## Slicing

### X[start,stop,step]

In [12]:
ar1

array([75, 55, 92, 88, 63, 36, 18, 43, 84])

In [13]:
ar1[0:8:3]

array([75, 88, 18])

In [14]:
ar1[::2] #tất cả  số có chỉ số chẵn 0,2,4,6,...

array([75, 92, 63, 18, 84])

In [23]:
ar2

array([[90, 91, 76, 89, 47],
       [94, 29, 97, 86, 90],
       [49, 67, 55,  6, 83],
       [24, 91, 37, 86, 76],
       [51, 96, 78, 34, 67]])

In [22]:
# two rows, three columns
ar2[:2,:3]

array([[90, 91, 76],
       [94, 29, 97]])

In [25]:
ar2[:,::2] # toàn bộ cột chẵn

array([[90, 76, 47],
       [94, 97, 90],
       [49, 55, 83],
       [24, 37, 76],
       [51, 78, 67]])

# Reshaping arrays andTranpose

In [26]:
grid=np.arange(1,10)

In [27]:
grid

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
grid.shape

(9,)

In [30]:
grid.reshape((3,3)) # thay đổi shape

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [31]:
x=np.array([1,2,3])

In [32]:
x.shape

(3,)

In [35]:
x=x.reshape((1,3)) # chuyển từ 1 dimension thành 2 dimenion

In [36]:
x.shape

(1, 3)

In [37]:
x

array([[1, 2, 3]])

In [38]:
y=np.random.randint(1,10,(3,3))

In [39]:
y

array([[3, 8, 6],
       [6, 4, 4],
       [3, 4, 8]])

In [42]:
y.T # ma trận chuyển vị

array([[3, 6, 3],
       [8, 4, 4],
       [6, 4, 8]])

# Array concatenation and spliting

In [48]:
x=np.array([1,2,4])
y=np.array([1,3,9])
z=np.concatenate((x,y)) # nối 2 array lại
#concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind") Join a sequence of arrays along an existing axis.

In [46]:
z

array([1, 2, 4, 1, 3, 9])

In [49]:
x1=np.array([[1,2,3],
             [4,5,6]])


In [50]:
x2=np.concatenate((x1,x1)) # để mặc định axis=0 là nối theo cột

In [51]:
x2

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [52]:
x3=np.concatenate((x1,x1),axis=1) # để axis =1 la nối theo hàng

In [53]:
x3

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [54]:
#vstack  nối theo cột
x=np.array([1,2,3])
grid=np.array([[4,5,6],
               [7,8,9]])


In [56]:
np.vstack((x,grid))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [57]:
#hstack nối theo hàng
y=np.array([[1,2],
           [3,4]])
z=np.array([[5,6,7],
           [8,9,0]])
np.hstack((y,z))

array([[1, 2, 5, 6, 7],
       [3, 4, 8, 9, 0]])

## Splitting arrray

In [58]:
x=np.random.randint(1,100,size=20)

In [59]:
x

array([84, 55, 33, 95, 17, 31, 33, 56, 38, 74, 39, 93, 62, 81, 57, 59, 25,
       24,  2, 61])

In [62]:
sub1,sub2,sub3,sub4=np.split(x,[5,10,15])  # những chỉ số sau dấu phẩy là điểm stop của mỗi subarray

In [63]:
sub1,sub2,sub3,sub4

(array([84, 55, 33, 95, 17]),
 array([31, 33, 56, 38, 74]),
 array([39, 93, 62, 81, 57]),
 array([59, 25, 24,  2, 61]))

# Broadcasting and Vectorized operations
### broadcasting is simply a set of rules for applying binary ufuns (subtraction,etc) on arrays of different sizes

![image.png](attachment:image.png)

In [64]:
a=np.arange(3)

In [65]:
a

array([0, 1, 2])

In [66]:
a+5 # broadcasting

array([5, 6, 7])

In [67]:
a*10

array([ 0, 10, 20])

In [75]:
b=np.ones((3,3))

In [76]:
b,a

(array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]),
 array([0, 1, 2]))

In [77]:
b+a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [78]:
b*a

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [79]:
a*b

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [80]:
c=np.arange(3).reshape((3,1))

In [82]:
a,c

(array([0, 1, 2]),
 array([[0],
        [1],
        [2]]))

In [81]:
a+c

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [83]:
a*c # lưu ý đây ko phải là nhân 2 ma trận đâu, nên là a*c =c*a. broadcast 2 ma trận thành cùng size rồi,nhân cùng index

array([[0, 0, 0],
       [0, 1, 2],
       [0, 2, 4]])

# Manipulating & Comparing Arrays 

## Aggregation

In [84]:
list_num=[1,3,6,7]

In [85]:
ll=np.array(list_num)

In [86]:
np.sum(ll)

17

In [87]:
# Create a massive array
mass=np.random.random(100000)

In [89]:
%timeit sum(mass) # dùng sum của python có sẵn

11.6 ms ± 945 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [90]:
%timeit np.sum(mass) # sum của numpy

62.5 µs ± 2.46 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


kết luận là hàm của numpy chạy nhanh hơn rất nhiều

In [91]:
np.mean(mass) # giá trị trung bình

0.5007151643282508

In [92]:
np.std(mass) # độ lệch chuẩn standard deviation= ameasure of how spread out a group of numbers is from the mean
# standard deviation= square root of variance

0.28871394916892246

In [93]:
np.max(mass)

0.9999982936211771

In [94]:
np.min(mass)

2.0822423753674713e-05

# Statistics

In [96]:
dog_height=np.array([600,470,170,430,300])

In [97]:
np.mean(dog_height)

394.0

In [98]:
np.var(dog_height) # phương sai

21704.0

In [99]:
np.std(dog_height) # độ lệch chuẩn

147.32277488562318

In [100]:
 np.sqrt(np.var(dog_height)) # phương sai là bình phươn của độ lệch chuẩn

147.32277488562318

# Sort

In [101]:
# np.sort dựa trên quick sort
x=np.random.randint(1,100,size=9)

In [103]:
x

array([49, 35, 82, 87, 21, 34, 34, 18, 23])

In [104]:
np.sort(x) # sắp xếp theo giá trị

array([18, 21, 23, 34, 34, 35, 49, 82, 87])

In [105]:
np.argsort(x) # sắp xếp theo chỉ số

array([7, 4, 8, 5, 6, 1, 0, 2, 3], dtype=int64)

## Sorting along rows and columns
NumPy'sorting algo is the ability to sort along specific rows or columns of a multi-dimensional array using the axis argument

In [106]:
np.random.seed(42)
MatA=np.random.randint(0,10,size=(4,6))

In [107]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [108]:
np.sort(MatA,axis=0) # sort theo cột

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [109]:
np.sort(MatA,axis=1) # sort theo hàng

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

# Linear Algebra

In [110]:
MatA=np.array([[1,2,3],
               [4,5,6],
               [7,8,9]])
MatB=np.array([[6,5],
               [4,3],
               [2,1]])

In [111]:
# MatA (3x3) dot MatB (3x2) : nhân 2 ma trận
MatA.dot(MatB)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [114]:
1*6+2*4+3*2 # cách tính nhân 2 ma trận

20

In [113]:
MatB.T.dot(MatA) # (B.T)*A 

array([[36, 48, 60],
       [24, 33, 42]])

# Dot product example

In [115]:
np.random.seed(0)
sale_amounts=np.random.randint(20,size=(5,3))

In [116]:
sale_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [117]:
import pandas as pd
weekly_sale=pd.DataFrame(sale_amounts,index=["Mon","Tue","Wed","Thus","Fri"],
                         columns=["Alomnd Butter","Peanut Butter","Cashew Butter"])


In [118]:
weekly_sale

Unnamed: 0,Alomnd Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tue,3,3,7
Wed,9,19,18
Thus,4,6,12
Fri,1,6,7


In [119]:
price=np.array([10,8,12])


In [122]:
butter_price=pd.DataFrame(price.reshape((1,3)),index=["Price"],columns=["Alomnd Butter","Peanut Butter","Cashew Butter"])

In [123]:
butter_price

Unnamed: 0,Alomnd Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [125]:
total_price=weekly_sale.dot(butter_price.T)

In [126]:
weekly_sale["Total_price"]=total_price

In [127]:
weekly_sale

Unnamed: 0,Alomnd Butter,Peanut Butter,Cashew Butter,Total_price
Mon,12,15,0,240
Tue,3,3,7,138
Wed,9,19,18,458
Thus,4,6,12,232
Fri,1,6,7,142
