In [2]:
import numpy as np

## Data Types & Attributes

In [3]:
# Numpy's main data type is the ndarray

arr1 = np.array([
    [4, 2, 5],
    [3, 6, 2],
    [7, 4.4, 7.3]
])
arr1

array([[4. , 2. , 5. ],
       [3. , 6. , 2. ],
       [7. , 4.4, 7.3]])

In [4]:
arr1.shape

(3, 3)

In [5]:
type (arr1) # Result is numpy.ndarray which means n-dimenstional array

numpy.ndarray

In [6]:
arr1.ndim

2

In [7]:
arr1.dtype # The type of array elements

dtype('float64')

In [8]:
arr1.size  # The number of elements

9

## Combination with Pandas

In [9]:
arr1

array([[4. , 2. , 5. ],
       [3. , 6. , 2. ],
       [7. , 4.4, 7.3]])

In [10]:
import pandas as pd
df = pd.DataFrame(arr1)
df

Unnamed: 0,0,1,2
0,4.0,2.0,5.0
1,3.0,6.0,2.0
2,7.0,4.4,7.3


## Creating Arrays

In [11]:
array_ones = np.ones((2, 3), 'int') # Creates a 2x3 np-arr populated with ones. Try np.ones((2, 3), dtype='int', order='C')
array_ones

array([[1, 1, 1],
       [1, 1, 1]])

In [12]:
arr_zeros = np.zeros((3, 3))
arr_zeros

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [13]:
range_arr = np.arange(0, 11, 2) # Args are 'start', 'stop', 'step'
range_arr

array([ 0,  2,  4,  6,  8, 10])

In [14]:
random_arr = np.random.randint(2, 6, size=(3, 3)) # Create a 3x3 np-arr with random elements between 2 and 6 (6 non-inclusive)
random_arr

array([[5, 3, 3],
       [4, 2, 5],
       [3, 2, 3]])

In [15]:
random_arr2 = np.random.random((2, 3))  # Creates a 2x3 array with elements between 0 and 1 (both non-inclusive)
random_arr2

array([[0.85826378, 0.9446245 , 0.68443507],
       [0.14659732, 0.60778344, 0.72657431]])

In [16]:
# Numpy actually applies a seed, but it's a random seed.
random_arr3 = np.random.rand(2, 3, 4)   # Create a 2x3x4 array with elements between 0 and 1 (both non-inclusive)
random_arr3

array([[[0.03861861, 0.24363558, 0.11792135, 0.76119159],
        [0.51810022, 0.91624202, 0.14216411, 0.46497221],
        [0.62126547, 0.29789662, 0.00813099, 0.65160115]],

       [[0.64438012, 0.12720667, 0.04911451, 0.36017427],
        [0.62210202, 0.70154253, 0.39969274, 0.64118519],
        [0.99709703, 0.79120416, 0.49451638, 0.24652094]]])

In [17]:
np.random.seed() # Produces random values only once. Now these random values can be shared/reproduced
random_arr4 = np.random.randint(1, 10, (2, 2)) # Create a 2x2 matrix between random values between 1 and 10
random_arr4

array([[1, 8],
       [2, 4]])

## Viewing Arrays and Matrices

In [18]:
np.unique(random_arr4)            # Returns the unique numbers in a numpy array.

array([1, 2, 4, 8])

## Manipulating / Comparing Arrays

### Arithmetic

In [19]:
arr1 = np.array([4, 2, 5])
arr1

array([4, 2, 5])

In [20]:
ones_arr = np.ones(3, 'int')
ones_arr

array([1, 1, 1])

In [21]:
ones_arr + arr1             # Adds elements position by position

array([5, 3, 6])

### Use python functions on python data types and numpy methods on numpy data types

In [22]:
np.sum([2, 3, 7])           # Sums the elements of an array. Similar to sum([2, 3, 7])

12

In [23]:
new_random = np.random.randint(1, 1000, (50, 10))
new_random

array([[520, 423, 122, 253, 967, 863, 830, 493, 885, 759],
       [147, 421, 635, 576, 172, 378, 428, 638, 519, 423],
       [638, 596, 796, 100, 306, 470, 720, 952, 344, 473],
       [ 36, 626, 235,  38, 740, 530, 468, 111, 673, 124],
       [533, 221, 224, 681, 977, 341, 577, 397, 172,  15],
       [828, 522, 495, 323, 271, 329, 307, 979, 154, 674],
       [840, 700, 432, 558, 650, 334, 462, 895, 565, 424],
       [393, 547, 765, 631, 971, 434,  16, 660, 815, 768],
       [209, 446, 880, 274,  66, 434, 418, 678, 902, 214],
       [883, 816, 157, 932, 528, 794, 860, 794, 566, 855],
       [241, 382,  31, 396, 291, 299, 282, 917, 135, 566],
       [367,  16, 542,  92, 610, 457,  50, 888, 522, 410],
       [133, 587,  57, 416, 848, 845, 258, 753, 402, 739],
       [476, 824, 155, 834,  92, 245,  87, 571, 271, 739],
       [ 92, 810,  77, 696, 300, 625, 250, 239, 407, 849],
       [ 89, 105, 132, 439, 521, 136, 774, 855, 694,  86],
       [516, 342, 266, 442, 400, 455, 242,  96, 913, 892

In [24]:
%timeit sum(new_random)               # Measuring the time python's sum() takes to execute
%timeit np.sum(new_random)            # Measuring the time numpy.sum() takes to execute

65.9 µs ± 1.83 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
10 µs ± 184 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [25]:
np.mean(new_random), np.max(new_random), np.std(new_random), np.var(new_random) # np.std is standard dev, np.var is variance

(485.19, 998, 284.33729600599355, 80847.6979)

### Reshaping and Transposing

In [26]:
new_arr = np.array([[1, 2, 3], [4, 5, 6]])
new_arr

array([[1, 2, 3],
       [4, 5, 6]])

In [27]:
new_arr.reshape(3, 2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [28]:
new_arr.T # Transpose

array([[1, 4],
       [2, 5],
       [3, 6]])

In [51]:
# Dot product
arr4 = np.random.randint(4, size=(3, 4))
new_arr.dot(arr4)

array([[11, 12,  6, 16],
       [29, 27, 18, 40]])

# Comparison Operators

In [55]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([[1, 2.1, 3.3], [4.2, 5.1, 6]])

arr1, arr2

(array([1, 2, 3]),
 array([[1. , 2.1, 3.3],
        [4.2, 5.1, 6. ]]))

In [56]:
arr1 > arr2

array([[False, False, False],
       [False, False, False]])

# Sorting Arrays

In [64]:
np.random.seed(2)
random_arr = np.random.randint(1, 19, size=(3, 4))
random_arr

array([[ 9, 16, 14,  9],
       [12, 12,  9,  8],
       [ 3, 18, 12, 16]])

In [65]:
np.sort(random_arr)              # Sorts elements in each row, within each row. Returns a sorted copy of the array

array([[ 9,  9, 14, 16],
       [ 8,  9, 12, 12],
       [ 3, 12, 16, 18]])

In [66]:
np.argsort(random_arr)           # Sorts but returns the initial indices of elements before sorting took place

array([[0, 3, 2, 1],
       [3, 2, 0, 1],
       [0, 2, 3, 1]], dtype=int64)

In [69]:
np.argmin(random_arr)            # Resolves to 8. The 9th element (at index 8) is the minimum number

8

<img src="img/panda.png"/>

### Turn image into a Numpy Array

In [76]:
from matplotlib.image import imread

panda_img_arr = imread('img/panda.png')
panda_img_arr

array([[[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       [[0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        [0.05490196, 0.10588235, 0.06666667],
        ...,
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765],
        [0.16470589, 0.12941177, 0.09411765]],

       ...,

       [[0.13333334, 0.07450981, 0.05490196],
        [0.12156863, 0.0627451 , 0.04313726],
        [0.10980392, 0

In [85]:
print(type(panda_img_arr))
panda_img_arr.size, panda_img_arr.shape, 

<class 'numpy.ndarray'>


(24465000, (2330, 3500, 3))

<img src="img/car-photo.png" />

In [93]:
car_img_arr = imread('img/car-photo.png')
car_img_arr.size, car_img_arr.shape

(991300, (431, 575, 4))