# NumPy (Numeric Python)

In [1]:
height = [1.73, 1.68, 1.71, 1.89, 1.79]
height

[1.73, 1.68, 1.71, 1.89, 1.79]

In [2]:
weight = [65.4, 59.2, 63.6, 88.4, 68.7]
weight

[65.4, 59.2, 63.6, 88.4, 68.7]

In [4]:
import numpy as np

In [3]:
np_height = np.array(height)
np_height

array([1.73, 1.68, 1.71, 1.89, 1.79])

In [5]:
np_weight = np.array(weight)
np_weight

array([65.4, 59.2, 63.6, 88.4, 68.7])

In [6]:
bmi = np_weight / np_height ** 2
bmi

array([21.85171573, 20.97505669, 21.75028214, 24.7473475 , 21.44127836])

NumPy Array

In [9]:
# Import the numpy package as np
import numpy as np

baseball = [180, 215, 210, 210, 188, 176, 209, 200]
print(baseball)

# Create a numpy array from baseball: np_baseball
np_baseball = np.array(baseball)
print(np_baseball)

# Print out type of np_baseball
print(type(np_baseball))

[180, 215, 210, 210, 188, 176, 209, 200]
[180 215 210 210 188 176 209 200]
<class 'numpy.ndarray'>


NumPy Boolean

In [10]:
np_baseball > 180

array([False,  True,  True,  True,  True, False,  True,  True])

NumPy with Specific Height

In [11]:
np_baseball[np_baseball > 180]

array([215, 210, 210, 188, 209, 200])

In [13]:
# Import numpy
import numpy as np

# Create a numpy array from height_in: np_height_in
height_in = [74, 74, 72, 75, 75, 73]
np_height_in = np.array(height_in)

# Print out np_height_in
print(np_height_in)

# Convert np_height_in to m: np_height_m
np_height_m = np_height_in * 0.0254

# Print np_height_m
print(np_height_m)

[74 74 72 75 75 73]
[1.8796 1.8796 1.8288 1.905  1.905  1.8542]


# Numpy Side Effects

**numpy** is great for doing vector arithmetic. If you compare its functionality with regular Python lists, however, some things have changed.

First of all, **numpy** arrays cannot contain elements with different types. Second, the typical arithmetic operators, such as +, -, * and / have a different meaning for regular Python lists and numpy arrays.

Some lines of code have been provided for you. Try these out and select the one that would match this:

np.array([True, 1, 2]) + np.array([3, 4, False])  
The numpy package is already imported as np.

In [14]:
numpy_array = np.array([True, 1, 2])
numpy_array1 = np.array([3, 4, False])

In [15]:
result = numpy_array + numpy_array1
result

array([4, 5, 2])

In [16]:
result = np.array([True, 1, 2]) + np.array([3, 4, False])
result

array([4, 5, 2])

# Subsetting NumPy Arrays

Subsetting (using the square bracket notation on lists or arrays) works exactly the same with both lists and arrays.

This exercise already has two lists, height_in and weight_lb, loaded in the background for you. These contain the height and weight of the MLB players as regular lists. It also has two numpy array lists, np_weight_lb and np_height_in prepared for you.

In [22]:
import numpy as np

weight_lb = [71, 68, 69, 70, 79, 80, 81, 68, 72, 76]
height_in = [73, 69, 70, 71, 73, 75, 76, 69, 73, 75]

np_weight_lb = np.array(weight_lb)
np_height_in = np.array(height_in)

# Print out the weight at index 5
print(np_weight_lb[5])

# Print out sub-array of np_height_in: index 5 up to and including index 10
print(np_height_in[5:10])

80
[75 76 69 73 75]


# 2D NumPy Arrays

In [23]:
np_2d = np.array([[1.73, 1.68, 1.71, 1.89, 1.79],
                  [65.4, 59.2, 63.6, 88.4, 68.7]])
np_2d

array([[ 1.73,  1.68,  1.71,  1.89,  1.79],
       [65.4 , 59.2 , 63.6 , 88.4 , 68.7 ]])

In [24]:
np_2d.shape

(2, 5)

In [25]:
np_2d[0]

array([1.73, 1.68, 1.71, 1.89, 1.79])

In [27]:
np_2d[1][3]

np.float64(88.4)

In [28]:
np_2d[1, 3]

np.float64(88.4)

In [29]:
np_2d[:, 2:4]

array([[ 1.71,  1.89],
       [63.6 , 88.4 ]])

In [30]:
np_2d[1, :]

array([65.4, 59.2, 63.6, 88.4, 68.7])

2D NumPy Array Practice

In [31]:
import numpy as np

baseball = [[180, 78.4],
            [215, 102.7],
            [210, 98.5],
            [188, 75.2]]

# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball)

# Print out the type of np_baseball
print(type(np_baseball))

# Print out the shape of np_baseball
print(np_baseball.shape)

<class 'numpy.ndarray'>
(4, 2)


Baseball data in 2D form

In [32]:
import numpy as np

# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball)

# Print out the shape of np_baseball
print(np_baseball.shape)

(4, 2)


# Subsetting 2D NumPy Arrays

If your 2D numpy array has a regular structure, i.e. each row and column has a fixed number of values, complicated ways of subsetting become very easy. Have a look at the code below where the elements "a" and "c" are extracted from a list of lists.

NumPy:  
import numpy as np  
np_x = np.array(x)  
np_x[:, 0]  
<br>
The indexes before the comma refer to the rows, while those after the comma refer to the columns. The : is for slicing; in this example, it tells Python to include all rows.

In [35]:
import numpy as np

np_baseball = np.array([
    [74, 180, 25],
    [72, 210, 30],
    [69, 200, 27],
    # Add more rows as needed
    [71, 195, 24],
] * 125)  

#np_baseball = np.array(baseball)

# Print out the 50th row of np_baseball
print(np_baseball[49])

# Select the entire second column of np_baseball: np_weight_lb
np_weight_lb = np_baseball[:, 1]

# Print out height of 124th player
print(np_baseball[123, 0])

[ 72 210  30]
71


2D Arithmetic
<br>


In [37]:
import numpy as np

np_baseball = np.array([
    [74, 180, 25],
    [72, 210, 30],
    [69, 200, 27],
    [71, 195, 24]
])

# Example data for updated (changes in height, weight, and age)
updated = np.array([
    [1, 5, 0],
    [-1, -10, 1],
    [0, 15, 2],
    [2, -5, 0]
])

#np_baseball = np.array(baseball)

# Print out addition of np_baseball and updated
print(np_baseball + updated)

# Create numpy array: conversion
conversion = np.array([0.0254, 0.453592, 1])

# Print out product of np_baseball and conversion
print(np_baseball * conversion)

[[ 75 185  25]
 [ 71 200  31]
 [ 69 215  29]
 [ 73 190  24]]
[[ 1.8796  81.64656 25.     ]
 [ 1.8288  95.25432 30.     ]
 [ 1.7526  90.7184  27.     ]
 [ 1.8034  88.45044 24.     ]]


# NumPy: Basic Statistics

Average versus median

In [38]:
import numpy as np

# Create np_height_in from np_baseball
np_height_in = np.array(np_baseball[:, 0])

# Print out the mean of np_height_in
print(np.mean(np_height_in))

# Print out the median of np_height_in
print(np.median(np_height_in))

71.5
71.5


Explore the baseball data

In [39]:
avg = np.mean(np_baseball[:,0])
print("Average: " + str(avg))

# Print median height
med = np.median(np_baseball[:, 0])
print("Median: " + str(med))

# Print out the standard deviation on height
stddev = np.std(np_baseball[:, 0])
print("Standard Deviation: " + str(stddev))

# Print out correlation between first and second column
corr = np.corrcoef(np_baseball[:, 0], np_baseball[:, 1])
print("Correlation: " + str(corr))

Average: 71.5
Median: 71.5
Standard Deviation: 1.8027756377319946
Correlation: [[ 1.         -0.54443572]
 [-0.54443572  1.        ]]


# Practice

In [41]:
x = ["e", "a", "b"]
y = x[1:]
y[0] = "g"
print(x)
print(y)

['e', 'a', 'b']
['g', 'b']
