In [None]:
import numpy as np

NumPy is useful to us mainly as a way to efficiently do math on collections of data. The core method/class we will use is `np.array`.

In [None]:
list_of_nums = np.array([1, 2, 3, 4, 5])
matrix_of_nums = np.array([[1, 2, 3],
                           [4, 5, 6],
                           [7, 8, 9]])

In [None]:
list_of_nums

In [None]:
matrix_of_nums

We can perform mathematical operations over an entire array.

In [None]:
list_of_nums + 4

In [None]:
matrix_of_nums ** 2

We can even perform operations with multiple arrays.

In [None]:
matrix_of_nums + matrix_of_nums

In [None]:
list_of_nums + list_of_nums

In [None]:
matrix_of_nums

In [None]:
# What is happening here?
matrix_of_nums + np.array([[1], 
                           [2], 
                           [3]])

In [None]:
# What is happening here?
matrix_of_nums + np.array([[1, 2], 
                           [2, 3], 
                           [3, 4]])

In [None]:
# What is happening here?
matrix_of_nums + np.array([1, 2, 3])

What happens when we try to add things of different dimensions?

In [None]:
matrix_of_nums + list_of_nums

We can get information about the array.

In [None]:
print(matrix_of_nums.size)
print(matrix_of_nums.ndim)
print(matrix_of_nums.shape)
print(matrix_of_nums.dtype)

In [None]:
# Specifying type
np.array([1, 2, 3]).dtype

In [None]:
np.array([1, 2, 3], dtype="float")

# Creating different arrays

Often, we want to create arrays of random numbers, or of a singular number, or maybe we don't even know what number, but we know the size of the array.

In [None]:
np.zeros((4, 4))

In [None]:
np.ones((3, 2, 3))

In [None]:
np.random.random((2, 5))

In [None]:
np.linspace(0, 1, 100)

In [None]:
np.arange(10, 15)

What if I want a range, but in a different shape?

In [None]:
my_range = np.arange(20)
my_range

In [None]:
my_range = my_range.reshape((4, 5))
my_range

Can I rotate this?

In [None]:
my_range.swapaxes(0, 1)

So far, this is _awesome_.

# Calculations

Let's get a random array of integers.

In [None]:
nums = np.random.randint(1, 100, (4, 5))
# Ok, NumPy, I am now mad at you b/c your randint works differently from random.randint.
nums

What's the max, min, mean, and sum of these numbers?

In [None]:
print("Max:", nums.max())
print("Min:", nums.min())
print("Avg:", nums.mean())
print("Sum:", nums.sum())

That's great, but what if I want to know this for each row?

In [None]:
print("Max:", nums.max(axis=1))
print("Min:", nums.min(axis=1))
print("Avg:", nums.mean(axis=1))
print("Sum:", nums.sum(axis=1))

And column?

In [None]:
print("Max:", nums.max(axis=0))
print("Min:", nums.min(axis=0))
print("Avg:", nums.mean(axis=0))
print("Sum:", nums.sum(axis=0))

Note that axis 0 is what we'd think of as a column, and axis 1 is the row. This is, to me, the most confusing thing in NumPy.

I can also get the cumulative sum along an axis.

In [None]:
nums

In [None]:
nums.cumsum(axis=0)

In [None]:
nums.cumsum(axis=1)

# Indexing

You can get elements out of your NumPy arrays much like you would with a list.

In [None]:
nums

In [None]:
nums[0,0]

In [None]:
nums[2,3]

In [None]:
# WHOA
nums[0:2, 1:3]

Note that this is **not** a copy -- it is a view of the original array. This is very different from lists.

In [None]:
nums_slice = nums[0:2, 1:3]
print("Before change", repr(nums_slice), sep="\n")
nums_slice[0,0] = 0
print("Changed", repr(nums_slice), sep="\n")
print("Original", repr(nums), sep="\n")

In [None]:
nums

In [None]:
nums[::-1, ::-1]

In [None]:
nums[0:4:2, 1:4]

In [None]:
nums

In [None]:
rotated = nums[::-1, ::-1]
rotated[0,0] = 99
nums

In [None]:
rotated = nums[::-1, ::-1].copy()
rotated[0,0] = 9999
nums

# Histograms

Here's a cool function for summarizing data.

In [None]:
nums

In [None]:
np.histogram(nums, bins=10)

In [None]:
speeds = np.random.randint(0, 100, (30, 60))

In [None]:
speeds

In [None]:
speeds.mean(axis=0)[0]

In [None]:
speeds.mean(axis=1)[29]

## List comprehensions (and the rest of Python) & NumPy

In [None]:
[x for x in np.random.randint(0, 10, (10,))]

In [None]:
[x for x in np.random.randint(0, 10, (10,2))]

In [None]:
import statistics as st

In [None]:
st.mean(np.random.randint(0, 10, (10, )))

In [None]:
sorted(np.random.randint(0, 10, (10, )))

In [None]:
a = np.random.randint(0, 10, (4,5))
a

In [None]:
a.sort(axis=0)
a

# References

* [NumPy](http://www.numpy.org/)
* [Official NumPy tutorial](http://www.scipy.org/Tentative_NumPy_Tutorial)
* [NumPy Example List](http://wiki.scipy.org/Numpy_Example_List)
* [NumPy MedKit slides](http://mentat.za.net/numpy/numpy_advanced_slides/)