# Numpy

* its fast, written on C <br>
* vectorization via broadcasting (avoiding loops) <br>
* backbone of other python scientific packages <br>

In [1]:
import numpy as np

## Datatypes and Attributes

In [2]:
# Numpy main datatypes is ndarray - n-dimensional-array
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [3]:
type(a1)

numpy.ndarray

In [10]:
a2 = np.array([[1, 2.0, 3], [4.6, 5, 7]])
print("a2: \n", a2)
a3 = np.array([[[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]],
                [[11, 12, 13],
                 [14, 15, 16],
                 [17, 18, 19]]])

print("\na3: \n", a3)

a2: 
 [[1.  2.  3. ]
 [4.6 5.  7. ]]

a3: 
 [[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[11 12 13]
  [14 15 16]
  [17 18 19]]]


In [11]:
a1.shape

(3,)

In [12]:
a2.shape

(2, 3)

In [13]:
a3.shape

(2, 3, 3)

In [14]:
a1.ndim, a2.ndim, a3.ndim

(1, 2, 3)

In [15]:
a1.dtype, a2.dtype, a3.dtype

(dtype('int64'), dtype('float64'), dtype('int64'))

In [16]:
a1.size, a2.size, a3.size

(3, 6, 18)

In [17]:
type(a1), type(a2), type(a3)

(numpy.ndarray, numpy.ndarray, numpy.ndarray)

In [18]:
# Create a DataFrame from a Numpy Array
import pandas as pd
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.6,5.0,7.0


## 2. Creating arrays

In [19]:
ones = np.ones((2, 3))
ones

array([[1., 1., 1.],
       [1., 1., 1.]])

In [20]:
zeros = np.zeros((2, 3))
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [22]:
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [28]:
random_array1 = np.random.randint(10, size=10)
random_array1

array([0, 5, 7, 9, 5, 4, 2, 6, 8, 9], dtype=int32)

In [29]:
random_array2 = np.random.randint(90, 100, size=10)
random_array2

array([98, 96, 98, 99, 93, 99, 94, 90, 93, 95], dtype=int32)

In [30]:
random_array3 = np.random.randint(10, 20, size=(3, 5))
random_array3

array([[18, 17, 17, 12, 17],
       [14, 14, 16, 17, 11],
       [11, 12, 14, 16, 16]], dtype=int32)

In [33]:
random_array4 = np.random.random((2, 3, 5))
random_array4

array([[[0.69719931, 0.85609822, 0.32470819, 0.22768897, 0.38174607],
        [0.80219228, 0.78083138, 0.04793414, 0.92430371, 0.71818856],
        [0.93713883, 0.08503536, 0.60258168, 0.51276706, 0.6142923 ]],

       [[0.19456533, 0.94360238, 0.08607082, 0.45426962, 0.1644924 ],
        [0.93959812, 0.0019664 , 0.70901663, 0.73871516, 0.38709276],
        [0.91748371, 0.40681408, 0.81992761, 0.22867355, 0.96046   ]]])

In [47]:
# Pseudo-random numbers
# Numpy random seed
# np.random.seed(seed=69)
# or 
np.random.seed(69)
random_array5 = np.random.randint(10, size=(5, 3))
random_array5

array([[6, 9, 7],
       [4, 1, 6],
       [9, 8, 1],
       [6, 0, 8],
       [5, 4, 5]], dtype=int32)

## 3. Viewing arrays and matrices

In [48]:
np.unique(random_array5)

array([0, 1, 4, 5, 6, 7, 8, 9], dtype=int32)

In [66]:
b1 = np.random.randint(2, size=(3))
print("b1: \t" , b1)
b2 = np.array([1, 2, 3])
print("b1 + b2: ", b1 + b2)
print("b1 - b2: ", b1 - b2)
print("b1 * b2: ", b1 * b2)
print("b1 / b2: ", b1 / b2)

b1: 	 [1 0 0]
b1 + b2:  [2 2 3]
b1 - b2:  [ 0 -2 -3]
b1 * b2:  [1 0 0]
b1 / b2:  [1. 0. 0.]


In [67]:
np.sum(b2)

np.int64(6)

In [68]:
sum(b2)

np.int64(6)

In [72]:
massive_array = np.random.random(10000)
massive_array[:10]

array([0.22270233, 0.37343058, 0.32576902, 0.94736749, 0.97986627,
       0.11608845, 0.08068402, 0.49789766, 0.47914073, 0.74672875])

In [73]:
%timeit sum(massive_array) # Python function : sum()
%timeit np.sum(massive_array) # Numpy function : np.sum()

627 μs ± 18.5 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
5.76 μs ± 545 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [74]:
np.max(b2)

np.int64(3)

In [75]:
np.min(b2)

np.int64(1)

In [76]:
# Returns the standard deviation, a measure of the spread of a distribution, of the array elements. 
# The standard deviation is computed for the flattened array by default, otherwise over the specified axis.
np.std(b2)

np.float64(0.816496580927726)

In [77]:
np.var(b2)

np.float64(0.6666666666666666)

In [78]:
np.sqrt(np.std(b2))

np.float64(0.9036020036098449)

In [79]:
high_var_array = np.array([1, 100, 200, 700, 2000, 7000])
low_var_array = np.array([1, 2, 4, 7, 9, 15])

print("high varience: ", np.var(high_var_array))
print("low varience: ", np.var(low_var_array))

high varience:  6145000.138888889
low varience:  22.555555555555557


In [80]:
np.std(high_var_array), np.std(low_var_array)

(np.float64(2478.911079262201), np.float64(4.749268949591669))

In [81]:
np.mean(high_var_array), np.mean(low_var_array)

(np.float64(1666.8333333333333), np.float64(6.333333333333333))

In [82]:
np.median(high_var_array), np.median(low_var_array)

(np.float64(450.0), np.float64(5.5))

In [83]:
a2

array([[1. , 2. , 3. ],
       [4.6, 5. , 7. ]])

In [84]:
a2.shape

(2, 3)

In [88]:
a2.reshape(2,3,1)

array([[[1. ],
        [2. ],
        [3. ]],

       [[4.6],
        [5. ],
        [7. ]]])

In [92]:
a2_reshape = a2.reshape(2, 3, 1)
a2_reshape * a3

array([[[  1. ,   2. ,   3. ],
        [  8. ,  10. ,  12. ],
        [ 21. ,  24. ,  27. ]],

       [[ 50.6,  55.2,  59.8],
        [ 70. ,  75. ,  80. ],
        [119. , 126. , 133. ]]])

In [94]:
a2.T

array([[1. , 4.6],
       [2. , 5. ],
       [3. , 7. ]])

In [95]:
np.random.seed(0)
# Number of jars sold
sales_amounts = np.random.randint(20, size=(5, 3))
sales_amounts

array([[12, 15,  0],
       [ 3,  3,  7],
       [ 9, 19, 18],
       [ 4,  6, 12],
       [ 1,  6,  7]], dtype=int32)

In [96]:
# creates weekly_sales dataframe
weekly_sales = pd.DataFrame(sales_amounts,
                            index=["Mon", "Tues", "Wed", "Thursh", "Fri"],
                            columns=["Almond Butter", "Peanut Butter", "Cashew Butter"])

weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Mon,12,15,0
Tues,3,3,7
Wed,9,19,18
Thursh,4,6,12
Fri,1,6,7


In [97]:
# create prices array
prices = np.array([10, 8, 12])
prices

array([10,  8, 12])

In [98]:
# Create butter prices Dataframe
butter_prices = pd.DataFrame(prices.reshape(1, 3),
                             index=["Price"],
                             columns=["Almond Butter", "Peanut Butter", "Cashew Butter"])
butter_prices

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter
Price,10,8,12


In [100]:
total_sales = weekly_sales.dot(butter_prices.T)
total_sales

Unnamed: 0,Price
Mon,240
Tues,138
Wed,458
Thursh,232
Fri,142


In [102]:
weekly_sales["Total (₹)"] = total_sales
weekly_sales

Unnamed: 0,Almond Butter,Peanut Butter,Cashew Butter,Total (₹)
Mon,12,15,0,240
Tues,3,3,7,138
Wed,9,19,18,458
Thursh,4,6,12,232
Fri,1,6,7,142


In [105]:
np.sort(a2)

array([[1. , 2. , 3. ],
       [4.6, 5. , 7. ]])

## Practical Example: Converting image into data

![](panda.png)

In [118]:
# Turn an image into a numpy array
from matplotlib.image import imread

panda = imread("panda.png")
print(type(panda))
# print(panda)

<class 'numpy.ndarray'>


In [115]:
panda.shape, panda.size, panda.ndim

((2330, 3500, 3), 24465000, 3)

## cAr photo

![](car-photo.png)

In [119]:
car = imread("car-photo.png")
car.dtype, car.shape, car.size, car.ndim

(dtype('float32'), (431, 575, 4), 991300, 3)

## Dog Photo

![](dog-photo.png)

In [121]:
dog = imread("dog-photo.png")

dog.dtype, dog.shape, dog.size, dog.ndim

(dtype('float32'), (432, 575, 4), 993600, 3)