In [2]:
import numpy as np

## Creating Numpy Arrays from Python lists

In [2]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [4]:
a1 = np.array([[1,2,3],
              [4,5,6]])

In [5]:
type(a1)

numpy.ndarray

In [6]:
a1.shape

(2, 3)

In [9]:
a1.ndim

2

In [10]:
a1.dtype

dtype('int32')

In [13]:
a1.size

6

## Creating Numpy Arrays from Scratch
## zeros, ones, full, arange, linspace

In [4]:
np.zeros([2,4], dtype = int)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [5]:
np.ones([3,5], dtype = int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [6]:
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [12]:
np.full([2,3], 9)

array([[9, 9, 9],
       [9, 9, 9]])

#### random

In [16]:
# Seed for reproducibility
np.random.seed(0)
np.random.random((4,4))

array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
       [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
       [0.96366276, 0.38344152, 0.79172504, 0.52889492],
       [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])

In [17]:
np.random.normal(0, 1, [3,3])

array([[ 0.44386323,  0.33367433,  1.49407907],
       [-0.20515826,  0.3130677 , -0.85409574],
       [-2.55298982,  0.6536186 ,  0.8644362 ]])

In [20]:
np.random.randint(0, 100, [2,2])

array([[42, 58],
       [31,  1]])

## Array Indexing & Slicing
### One-dimensional subarray

In [19]:
x1 = np.random.randint(20, size = 6)

In [21]:
x1

array([ 5,  6,  8, 17, 15,  4])

In [23]:
x1[4], x1[0]

(15, 5)

### Multi-dimensional array

In [25]:
x2 = np.random.randint(10, size = (3,4))

In [26]:
x2

array([[1, 7, 9, 9],
       [3, 6, 7, 2],
       [0, 3, 5, 9]])

In [27]:
x2[1][2]

7

### Slicing :
x[start:stop:step]

In [28]:
x1

array([ 5,  6,  8, 17, 15,  4])

In [30]:
x1[::2]

array([ 5,  8, 15])

In [31]:
x2

array([[1, 7, 9, 9],
       [3, 6, 7, 2],
       [0, 3, 5, 9]])

In [32]:
x2[:2,:3]

array([[1, 7, 9],
       [3, 6, 7]])

In [33]:
x2[:,:2]

array([[1, 7],
       [3, 6],
       [0, 3]])

## Reshaping of Arrays & Transpose

In [34]:
grid = np.arange(1,10)
grid.shape

(9,)

In [35]:
grid.reshape((3,3))

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [37]:
x = np.array([1,2,3])
x.shape

(3,)

In [39]:
x.reshape(1,3).shape

(1, 3)

In [42]:
x = np.array([[1.,2.],
             [3.,4.]])

In [43]:
x

array([[1., 2.],
       [3., 4.]])

In [44]:
x.T

array([[1., 3.],
       [2., 4.]])

## Array Concatenation and Splitting

In [46]:
x = np.array([1,2,3])
y = np.array([3,2,1])

In [47]:
np.concatenate((x,y))

array([1, 2, 3, 3, 2, 1])

In [49]:
grid = np.array([[1,2,3],
                 [4,5,6]])
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [50]:
np.concatenate((grid,grid))

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [51]:
np.concatenate((grid,grid), axis = 1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [52]:
# vstack
x = np.array([1,2,3])
grid = np.array([[9,8,7],
                 [6,5,4]])


In [53]:
np.vstack((x,grid))

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [54]:
# horizontally stack the arrays : hstack
y = np.array([[99],
              [99]])
np.hstack((y,grid))

array([[99,  9,  8,  7],
       [99,  6,  5,  4]])

In [55]:
x = np.array([1,2,3,99,6,1,3,2,1])

In [58]:
x1, x2, x3 = np.split(x, [3,5])

In [59]:
x1

array([1, 2, 3])

In [60]:
x2

array([99,  6])

In [61]:
x3

array([1, 3, 2, 1])

## Broadcasting and Vectorized operations

In [62]:
a = np.arange(3)

In [63]:
a

array([0, 1, 2])

In [64]:
a + 5 # broadcasting

array([5, 6, 7])

In [65]:
b = np.ones((3,3))

In [66]:
b

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [68]:
a.shape, b.shape

((3,), (3, 3))

In [69]:
a + b

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [70]:
a*b

array([[0., 1., 2.],
       [0., 1., 2.],
       [0., 1., 2.]])

In [71]:
c = np.arange(3).reshape((3,1))

In [72]:
c

array([[0],
       [1],
       [2]])

In [73]:
a + c

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

## Manipulating & Comparing Arrays
### Aggregation
Aggregation = perform the same operation on a number of things

In [74]:
list_number = [1,2,3]

In [76]:
ll = np.array(list_number)

In [77]:
ll

array([1, 2, 3])

In [78]:
sum(ll)

6

In [79]:
np.sum(ll)

6

In [81]:
# Create a massive Numpy array
massive_array = np.random.random(10000)
massive_array.shape

(10000,)

In [82]:
%timeit sum(massive_array) #Python's funtion
%timeit np.sum(massive_array) #Numpy's funtion

953 µs ± 60.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
8.8 µs ± 292 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [83]:
np.mean(massive_array)

0.4972911879968765

In [85]:
dog_height = [600,470,170,430,300]
dog_height = np.array(dog_height)

np.std(dog_height)

147.32277488562318

In [86]:
np.var(dog_height)

21704.0

In [87]:
np.sqrt(np.var(dog_height))

147.32277488562318

## Sorting Arrays
np.sort uses an quicksort algorithm 

In [88]:
x = np.array([2,1,4,3,5])

In [89]:
np.sort(x)

array([1, 2, 3, 4, 5])

In [90]:
# A related function is argsort, which instead returns indices of the sorted elements:
np.argsort(x)

array([1, 0, 3, 2, 4], dtype=int64)

### Sorting along rows or columns
NumPy's sorting algorithms is the ability to sort along specific rows or columns of a multidimensional array using the axis argument

In [92]:
np.random.seed(42)

MatA = np.random.randint(0, 10, size = (4,6))

In [93]:
MatA

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [94]:
np.sort(MatA, axis = 0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [95]:
np.sort(MatA)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

## Linear Algebra

In [96]:
A = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])

In [97]:
B = np.array([[6,5],
             [4,3],
             [2,1]])

In [98]:
# A(3x3) dot product B(3x2)
A.dot(B)

array([[20, 14],
       [56, 41],
       [92, 68]])

In [99]:
A @ B

array([[20, 14],
       [56, 41],
       [92, 68]])

In [100]:
#B(3x2) dot A(3x3)
B.T @ A

array([[36, 48, 60],
       [24, 33, 42]])

### Dot Product Example
### Ứng dụng của tích ma trận

In [105]:
# Num of jars sold
np.random.seed(0)

sales_amounts = np.random.randint(20, size=(5,3))

In [106]:
sales_amounts 

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]])

In [109]:
# Create Weekly_sales DataFrame
import pandas as pd
weekly_sales = pd.DataFrame(sales_amounts, index = ["Mon", "Tues", "Wed", "Thurs", "Fri"],
                                            columns = ["Almon Butter", "Peanut Butter", "Cashew Butter"])                                      

In [110]:
weekly_sales

Unnamed: 0,Almon Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thurs,4,6,12
Fri,1,6,7


In [111]:
# Create a price array

prices = np.array([10,8,12])

In [112]:
butter_prices = pd.DataFrame(prices.reshape(1,3), index = ["Price"], columns = ["Almon Butter", "Peanut Butter", "Cashew Butter"])

In [113]:
butter_prices

Unnamed: 0,Almon Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [114]:
weekly_sales.shape, butter_prices.shape

((5, 3), (1, 3))

In [116]:
total_prices = weekly_sales.dot(butter_prices.T)

In [120]:
total_prices

Unnamed: 0,Price
Mon,240
Tues,138
Wed,458
Thurs,232
Fri,142


In [121]:
weekly_sales["Total Price"] = total_prices
weekly_sales

Unnamed: 0,Almon Butter,Peanut Butter,Cashew Butter,Total Price
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thurs,4,6,12,232
Fri,1,6,7,142
