# NumPy Tutorial: Your First Steps Into Data Science in Python

In [1]:
import numpy as np

In [2]:
CURVE_CENTER = 80
grades = np.array([72, 35, 64, 88, 51, 90, 74, 12])

In [3]:
def curve(grades):
    average = grades.mean()
    change = CURVE_CENTER - average
    new_grades = grades + change
    return np.clip(new_grades, grades, 100)

In [4]:
curve(grades)

array([ 91.25,  54.25,  83.25, 100.  ,  70.25, 100.  ,  93.25,  31.25])

In [5]:
temperatures = np.array([
    29.3, 42.1, 18.8, 16.1, 38.0, 12.5,
    12.6, 49.9, 38.6, 31.3, 9.2, 22.2
    ]).reshape(2, 2, 3)

In [6]:
temperatures.shape

(2, 2, 3)

In [7]:
temperatures

array([[[29.3, 42.1, 18.8],
        [16.1, 38. , 12.5]],

       [[12.6, 49.9, 38.6],
        [31.3,  9.2, 22.2]]])

In [8]:
np.swapaxes(temperatures, 1, 2)

array([[[29.3, 16.1],
        [42.1, 38. ],
        [18.8, 12.5]],

       [[12.6, 31.3],
        [49.9,  9.2],
        [38.6, 22.2]]])

In [9]:
table = np.array([
    [5, 3, 7, 1],
    [2, 6, 7 ,9],
    [1, 1, 1, 1],
    [4, 3, 2, 0],
    ])

In [10]:
table.max()  # Max value in array

9

In [11]:
table.max(axis=0)  # Max value in each column

array([5, 6, 7, 9])

In [12]:
table.max(axis=1)  # Max value in each row

array([7, 9, 1, 4])

Arrays can be broadcast against each other if their dimensions match or if one of the arrays has a size of 1

In [13]:
A = np.arange(32).reshape(4, 1, 8)
A

array([[[ 0,  1,  2,  3,  4,  5,  6,  7]],

       [[ 8,  9, 10, 11, 12, 13, 14, 15]],

       [[16, 17, 18, 19, 20, 21, 22, 23]],

       [[24, 25, 26, 27, 28, 29, 30, 31]]])

In [14]:
B = np.arange(48).reshape(1, 6, 8)
B

array([[[ 0,  1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47]]])

In [15]:
A + B

array([[[ 0,  2,  4,  6,  8, 10, 12, 14],
        [ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54]],

       [[ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62]],

       [[16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70]],

       [[24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70],
        [64, 66, 68, 70, 72,

In [16]:
square = np.array([
    [16, 3, 2, 13],
    [5, 10, 11, 8],
    [9, 6, 7, 12],
    [4, 15, 14, 1]
    ])

In [17]:
for i in range(4):
    assert square[:, i].sum() == 34
    assert square[i, :].sum() == 34

In [18]:
assert square[:2, :2].sum() == 34
assert square[2:, :2].sum() == 34
assert square[:2, 2:].sum() == 34
assert square[2:, 2:].sum() == 34

## Masking

In [19]:
numbers = np.linspace(5, 50, 24, dtype=int).reshape(4, -1)
numbers

array([[ 5,  6,  8, 10, 12, 14],
       [16, 18, 20, 22, 24, 26],
       [28, 30, 32, 34, 36, 38],
       [40, 42, 44, 46, 48, 50]])

In [20]:
mask = numbers % 4 == 0
mask

array([[False, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False]])

In [21]:
numbers[mask]

array([ 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48])

In [22]:
by_four = numbers[numbers % 4 == 0]  # In one line
by_four

array([ 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48])

In [23]:
from numpy.random import default_rng

In [24]:
rng = default_rng()
values = rng.standard_normal(10000)
values[:5]

array([ 0.30879936, -0.79240605, -1.03264633,  1.26777262,  0.06777724])

In [25]:
std = values.std()
std

1.0061769860884635

In [26]:
filtered = values[(values > -2 * std) & (values < 2 * std)]
filtered.size

9548

In [27]:
values.size

10000

In [28]:
filtered.size / values.size

0.9548

## Transposing, Sorting, and Concatenating

In [29]:
a = np.arange(1,7).reshape(-1, 2)
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [30]:
a.T

array([[1, 3, 5],
       [2, 4, 6]])

In [31]:
a.transpose()

array([[1, 3, 5],
       [2, 4, 6]])

In [32]:
data = np.array([
    [7, 1, 4],
    [8, 6, 5],
    [1, 2, 3]
    ])

In [33]:
np.sort(data)  # Sort values inside each sub-array

array([[1, 4, 7],
       [5, 6, 8],
       [1, 2, 3]])

In [34]:
np.sort(data, axis=None)  # Sort values in whole array as flatten

array([1, 1, 2, 3, 4, 5, 6, 7, 8])

In [35]:
np.sort(data, axis=0)  # Sort values by columns

array([[1, 1, 3],
       [7, 2, 4],
       [8, 6, 5]])

In [36]:
 a = np.array([
     [4, 8],
     [6, 1]
     ])

In [37]:
b = np.array([
    [3, 5],
    [7, 2],
    ])

In [38]:
np.hstack((a, b))  # Horizontal stack

array([[4, 8, 3, 5],
       [6, 1, 7, 2]])

In [39]:
np.vstack((b, a))  # Vertical stack

array([[3, 5],
       [7, 2],
       [4, 8],
       [6, 1]])

In [40]:
np.concatenate((a, b))  # Concatenation

array([[4, 8],
       [6, 1],
       [3, 5],
       [7, 2]])

In [41]:
np.concatenate((a, b), axis=None)  #Concatenation as flatten

array([4, 8, 6, 1, 3, 5, 7, 2])

In [42]:
names = np.array(["bob", "amy", "han"], dtype=str)
names

array(['bob', 'amy', 'han'], dtype='<U3')

In [43]:
more_names = np.array(["bobo", "jehosephat"])
np.concatenate((names, more_names))

array(['bob', 'amy', 'han', 'bobo', 'jehosephat'], dtype='<U10')

In [44]:
names[2] = "alexander"
names

array(['bob', 'amy', 'ale'], dtype='<U3')

Np.array can not store string larger that specified dtype

## Structured Arrays

In [45]:
data = np.array([
    ("joe", 32, 6),
    ("mary", 15, 20),
    ("felipe", 80, 100),
    ("beyonce", 38, 9001),
    ], dtype=[("name", str, 10), ("age", int), ("power", int)])

In [46]:
data[0]

('joe', 32, 6)

In [47]:
data["name"]

array(['joe', 'mary', 'felipe', 'beyonce'], dtype='<U10')

In [48]:
data[data["power"] > 9000]["name"]

array(['beyonce'], dtype='<U10')

In [49]:
np.sort(data[data["age"] > 20], order="power")["name"]

array(['joe', 'felipe', 'beyonce'], dtype='<U10')

## Manipulating Images With Matplotlib

In [50]:
import matplotlib.image as mpimg

In [51]:
img = mpimg.imread("kitty.jpg")
print(type(img))
print(img.shape)

<class 'numpy.ndarray'>
(1299, 1920, 3)


In [52]:
output = img.copy()
output[:, :, :2] = 0
mpimg.imsave("blue.jpg", output)

In [55]:
averages = img.mean(axis=2)  # Take the average of each R, G, and B
mpimg.imsave("bad-gray.jpg", averages, cmap="gray")

In [54]:
weights = np.array([0.3, 0.59, 0.11])
grayscale = img @ weights
mpimg.imsave("good-gray.jpg", grayscale, cmap="gray")