## The efficiency of numpy

In [2]:
import numpy as np

In [3]:
x = np.random.random(100000000)

In [4]:
%%timeit
sum(x) / len(x)

8.22 s ± 19.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit
np.mean(x)

43.7 ms ± 372 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
import time

In [7]:
start = time.time()
sum(x) / len(x)
print(time.time() - start)

8.126240015029907


In [8]:
start = time.time()
np.mean(x)
print(time.time() - start)

0.04525399208068848


## Practicing

In [9]:
x = np.array([1, 2, 3, 4, 5])

In [10]:
print(x)
print(type(x))

[1 2 3 4 5]
<class 'numpy.ndarray'>


In [11]:
x.dtype

dtype('int64')

In [12]:
x.shape

(5,)

In [13]:
y = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

In [14]:
y.shape

(4, 3)

In [15]:
y.size

12

In [16]:
z = np.array(['Hello', 'World'])
print(z)

['Hello' 'World']


In [17]:
print('Shape:', z.shape)
print('Type:', type(x))
print('Data type:', z.dtype)

Shape: (2,)
Type: <class 'numpy.ndarray'>
Data type: <U5


In [18]:
a = np.array([4, 'World', 1, 2, 3])
print(a)
print('Shape:', a.shape)
print('Type:', type(a))
print('Data type:', a.dtype)

['4' 'World' '1' '2' '3']
Shape: (5,)
Type: <class 'numpy.ndarray'>
Data type: <U21


In [19]:
b = np.array([1, 2.5, 3])
print(b, b.dtype)

[1.  2.5 3. ] float64


In [20]:
c = np.array([1.8, 2.5, 3.7], dtype=np.int64)
print(c, c.dtype)

[1 2 3] int64


In [21]:
np.save('x_array', x)

In [22]:
retrieved_x = np.load('x_array.npy')
print(retrieved_x)

[1 2 3 4 5]


## Creating arrays differently

In [23]:
zeros = np.zeros((3,4))
print(zeros)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [24]:
print('Type:', zeros.dtype)

Type: float64


In [25]:
ones = np.ones((4, 5), dtype=int)
print(ones)
print('Data type:', ones.dtype)

[[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]]
Data type: int64


In [26]:
fives = np.full((4, 3), 5)
print(fives)

[[5 5 5]
 [5 5 5]
 [5 5 5]
 [5 5 5]]


### Uses dtype from the passed value

In [27]:
print('Data type:', fives.dtype)

Data type: int64


### Identity matrixes
These matrixes are always square

In [28]:
identity_matrix = np.eye(5)

print(identity_matrix)


[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [29]:
diag_matrix = np.diag([10, 20, 30, 40, 50])
print(diag_matrix)

[[10  0  0  0  0]
 [ 0 20  0  0  0]
 [ 0  0 30  0  0]
 [ 0  0  0 40  0]
 [ 0  0  0  0 50]]


In [30]:
one_arg = np.arange(10)
print(one_arg)

[0 1 2 3 4 5 6 7 8 9]


In [31]:
two_args = np.arange(4, 10)
print(two_args)

[4 5 6 7 8 9]


In [32]:
three_args = np.arange(4, 11, 2)
print(three_args)

[ 4  6  8 10]


As you can see, when the third argument (which indicates step) is **not** passed, it defaults to 1.

If we want non-integers to be the step, we use a different function:

In [33]:
new_x = np.linspace(1, 14, 10)
print(new_x)

[ 1.          2.44444444  3.88888889  5.33333333  6.77777778  8.22222222
  9.66666667 11.11111111 12.55555556 14.        ]


Note that `linspace(a, b, c)` will create an array with _c_ elements evenly spaced between the start and the stop. Also notice that, unlike with `arange(a, b, c)`, the stop _b_ is inclusive, although you can always change this.

Note in the cell above that if _c_ is not specified, it defaults to 50.

In [34]:
fifty_values = np.linspace(0, 10000, endpoint=False)
print(fifty_values)

[   0.  200.  400.  600.  800. 1000. 1200. 1400. 1600. 1800. 2000. 2200.
 2400. 2600. 2800. 3000. 3200. 3400. 3600. 3800. 4000. 4200. 4400. 4600.
 4800. 5000. 5200. 5400. 5600. 5800. 6000. 6200. 6400. 6600. 6800. 7000.
 7200. 7400. 7600. 7800. 8000. 8200. 8400. 8600. 8800. 9000. 9200. 9400.
 9600. 9800.]


## Reshaping
We can grab any array and change its shape, **making sure** that, of course, the new shape is compatible. For example, having a 50-element array, you cannot create a 6 by 7 matrix (42 elements), but you may create a 5 by 10 or a 10 by 5 matrix:

In [35]:
wrong = np.reshape(fifty_values, (6,7))

ValueError: cannot reshape array of size 50 into shape (6,7)

In [None]:
five_by_ten = np.reshape(fifty_values, (5,10))
print(five_by_ten)

In [None]:
ten_by_five = np.reshape(fifty_values, (10,5))
print(ten_by_five)

Reshape can also be used as a method for an np array, instead of a function. In that scenairo, passing the np array is not necessary:

In [None]:
new_ten_by_five = np.linspace(0, 10000, endpoint=False).reshape(10,5)
print(new_ten_by_five)

In [None]:
random_floats = np.random.random((3, 3))
print(random_floats)

In [None]:
random_ints = np.random.randint(4, 15, (3, 2))
print(random_ints)

In [None]:
numbers_with_mean_of_zero = np.random.normal(0, 0.1, size=(1000, 1000))
print(numbers_with_mean_of_zero)

In [None]:
print('Mean:', numbers_with_mean_of_zero.mean())
print('Std:', numbers_with_mean_of_zero.std())
print('Max:', numbers_with_mean_of_zero.max())
print('Min:', numbers_with_mean_of_zero.min())
print('# positive:', (numbers_with_mean_of_zero >= 0).sum())
print('# negative:', (numbers_with_mean_of_zero < 0).sum())

## Manipulating NumPy Arrays

In [None]:
x = np.array([1, 2, 3, 4, 5])
print(x)

In [36]:
print(x[0])
print('Is the same as')
print(x[-5])

1
Is the same as
1


In [37]:
x[3] = 20
print(x)

[ 1  2  3 20  5]


In [38]:
y = np.arange(1, 10).reshape(3, 3)
print(y)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [39]:
print(y[0, 0])

1


In [40]:
y[1, 1] = 100
print(y)

[[  1   2   3]
 [  4 100   6]
 [  7   8   9]]


In [41]:
print(x)
x = np.delete(x, [0, 2])
print(x)

[ 1  2  3 20  5]
[ 2 20  5]


In [43]:
print(y)

y_2 = np.delete(y, 0, axis=0) # delete first row
print('\n', y_2)

y_3 = np.delete(y, [0, 2], axis=1) # delete first and last column
print('\n', y_3)

[[  1   2   3]
 [  4 100   6]
 [  7   8   9]]

 [[  4 100   6]
 [  7   8   9]]

 [[  2]
 [100]
 [  8]]


In [47]:
x = np.array([1, 2, 3, 4, 5])
print(x)
x = np.append(x, 6)
print(x)
x = np.append(x, [7, 8])
print(x)

[1 2 3 4 5]
[1 2 3 4 5 6]
[1 2 3 4 5 6 7 8]


In [52]:
y = np.arange(1, 10).reshape(3, 3)
print(y)
y = np.append(y, [[10, 11, 12]], axis=0)
print('\n', y)
y = np.append(y, [[3.5], [6.5], [9.5], [12.5]], axis=1)
print('\n', y)

[[1 2 3]
 [4 5 6]
 [7 8 9]]

 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]

 [[ 1.   2.   3.   3.5]
 [ 4.   5.   6.   6.5]
 [ 7.   8.   9.   9.5]
 [10.  11.  12.  12.5]]


In [54]:
x = np.array([1, 2, 5, 6, 7])
print(x)
x = np.insert(x, 2, [3, 4])
print(x)

[1 2 5 6 7]
[1 2 3 4 5 6 7]


In [64]:
y = np.array([[1, 2, 3], [7, 8, 9]])
print(y)
z = np.insert(y, 1, [4, 5, 6], axis = 0)
print('\n', z)
zz = np.insert(y, 3, 4, axis = 1)
print('\n', zz)

[[1 2 3]
 [7 8 9]]

 [[1 2 3]
 [4 5 6]
 [7 8 9]]

 [[1 2 3 4]
 [7 8 9 4]]


In [65]:
x = np.array([1, 2])
print(x)
y = np.array([[3, 4], [5, 6]])
print('\n', y)
z = np.vstack((x, y))
print('\n', z)

[1 2]

 [[3 4]
 [5 6]]

 [[1 2]
 [3 4]
 [5 6]]


In [67]:
z2 = np.hstack((y, x.reshape(2, 1)))
print(z2)

[[3 4 1]
 [5 6 2]]


## Slicing

1. ndarray[start:end]
2. ndarray[start:]
3. ndarray[:end]

In [68]:
x = np.arange(1, 21).reshape(4, 5)
print(x)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [69]:
filtered_x = x[1:4, 2:5]
print(filtered_x)

[[ 8  9 10]
 [13 14 15]
 [18 19 20]]


In [70]:
filtered_x = x[1:, 2:]
print(filtered_x)

[[ 8  9 10]
 [13 14 15]
 [18 19 20]]


In [71]:
filtered_x = x[:3, 2:]
print(filtered_x)

[[ 3  4  5]
 [ 8  9 10]
 [13 14 15]]


In [73]:
filtered_x = x[2, :] # all columns in third row
print(filtered_x)

[11 12 13 14 15]


In [74]:
filtered_x = x[:, 2] # all rows in third column
print(filtered_x)

[ 3  8 13 18]


In [75]:
filtered_x = x[:, 2:3] # 2-d array
print(filtered_x)

[[ 3]
 [ 8]
 [13]
 [18]]


They say that slicing only creates a view of the original array.

**The 'filtered_x' does not copy the values from x, they are both names for the same array, if we change one, the other is changed as well:**

In [79]:
filtered_x[1, 0] = 8
print(filtered_x)
print('\n', x)
filtered_x[1, 0] = 100000
print('\n', filtered_x)
print('\n', x)

[[ 3]
 [ 8]
 [13]
 [18]]

 [[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]

 [[     3]
 [100000]
 [    13]
 [    18]]

 [[     1      2      3      4      5]
 [     6      7 100000      9     10]
 [    11     12     13     14     15]
 [    16     17     18     19     20]]


### If we actually want to create a copy, we need the copy function

In [80]:
new_x = np.arange(20).reshape(4, 5)
print(new_x)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [83]:
#new_filtered_x = np.copy(new_x[1:, 2:])
new_filtered_x = new_x[1:, 2:].copy() #both are the same
print(new_filtered_x)

[[ 7  8  9]
 [12 13 14]
 [17 18 19]]


In [85]:
new_filtered_x[2, 2] = 1000
print(new_filtered_x)
print('\n', new_x)

[[   7    8    9]
 [  12   13   14]
 [  17   18 1000]]

 [[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


## Diagonal

In [86]:
some_array = np.arange(20).reshape(4, 5)
print(some_array)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


In [87]:
d = np.diag(some_array)
print(d)

[ 0  6 12 18]


In [88]:
d = np.diag(some_array, k=1)
print(d)

[ 1  7 13 19]


In [89]:
d = np.diag(some_array, k=-1)
print(d)

[ 5 11 17]


## Unique values

In [90]:
repeted = np.array([[1, 2, 3], [4, 3, 2], [7, 9, 1]])
print(repeted)

[[1 2 3]
 [4 3 2]
 [7 9 1]]


In [91]:
unique = np.unique(repeted)
print(unique)

[1 2 3 4 7 9]
