# NumPy

NumPy is not installed by default in python installation but if you are using Anaconda, then it is installed within Virtual Environment. 

In [12]:
import numpy as np

In [13]:
np.__version__

'1.20.3'

## Introduction to NumPy arrays

Data types and its sizes - https://www.educba.com/numpy-data-types/


In [14]:
arr = np.array([10, 20, 30])
arr

array([10, 20, 30])

In [15]:
type(arr)

numpy.ndarray

In [16]:
arr.dtype

dtype('int64')

In [17]:
arr = np.array([1.5, 2.5, 3.5])
arr

array([1.5, 2.5, 3.5])

In [19]:
arr.dtype

dtype('float64')

In [26]:
# if we are trying to convert the number that is bigger than size of the data type we are using we are 
# experiencing integer overflow
arr = np.array([10, 20, 127, 300], dtype='int8')
arr

array([ 10,  20, 127,  44], dtype=int8)

In [27]:
arr.dtype

dtype('int8')

In [28]:
a = np.array([10, 20, 30, 40, 50])
b = np.array([100, 200, 300, 400, 500])

In [29]:
a

array([10, 20, 30, 40, 50])

In [30]:
b

array([100, 200, 300, 400, 500])

In [32]:
a[0], a[3]

(10, 40)

In [33]:
a[0] * 2

20

In [34]:
for number in a:
    print(number)

10
20
30
40
50


In [35]:
a + b

array([110, 220, 330, 440, 550])

In [36]:
a - b

array([ -90, -180, -270, -360, -450])

In [37]:
a * b

array([ 1000,  4000,  9000, 16000, 25000])

In [38]:
a / b

array([0.1, 0.1, 0.1, 0.1, 0.1])

In [39]:
a // b

array([0, 0, 0, 0, 0])

In [40]:
a / 2

array([ 5., 10., 15., 20., 25.])

In [41]:
arr = np.array([
    [10, 20, 30, 40, 50],
    [100, 200, 300, 400, 500],
    [1000, 2000, 3000, 4000, 5000]
])
arr

array([[  10,   20,   30,   40,   50],
       [ 100,  200,  300,  400,  500],
       [1000, 2000, 3000, 4000, 5000]])

In [42]:
arr[1][3]

400

In [43]:
arr.size

15

In [44]:
arr.ndim

2

In [45]:
arr.shape

(3, 5)

In [46]:
np.full(10, 123)

array([123, 123, 123, 123, 123, 123, 123, 123, 123, 123])

In [47]:
np.full((3, 5), 2.5)  # first argument is a tuple describing dimensions

array([[2.5, 2.5, 2.5, 2.5, 2.5],
       [2.5, 2.5, 2.5, 2.5, 2.5],
       [2.5, 2.5, 2.5, 2.5, 2.5]])

In [49]:
np.ones((3, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [50]:
np.zeros((3, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [54]:
list(range(10, 21, 2))

[10, 12, 14, 16, 18, 20]

In [55]:
np.arange(10, 21, 2)

array([10, 12, 14, 16, 18, 20])

In [56]:
np.arange(10, 21, 0.5)

array([10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15. ,
       15.5, 16. , 16.5, 17. , 17.5, 18. , 18.5, 19. , 19.5, 20. , 20.5])

In [57]:
np.arange(10, 21, 0.5) * 2

array([20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
       33., 34., 35., 36., 37., 38., 39., 40., 41.])

### Speed test

Task: Create a list that will contain 100 000 integers from 1 to 10 000 and each number should be squared. 

timeit
-r = how many times we should repeat measurement
-n = how many time we should repeat our cell within measurement

In [67]:
%%timeit -r 7 -n 10
# standard approach with for loop
result = []
for i in range(1, 100_001):
    result.append(i ** 2)

28.7 ms ± 1.21 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [69]:
%%timeit -r 7 -n 10
# approach with list comprehnsion
[i ** 2 for i in range(1, 100_001)]

26.3 ms ± 1.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [77]:
%%timeit -r 7 -n 10
# using NumPy arange function
np.arange(1, 100_001) ** 2

The slowest run took 6.22 times longer than the fastest. This could mean that an intermediate result is being cached.
227 µs ± 144 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [79]:
# 0.026300 - list comprehension
# 0.000227 - NumPy arange
26.3E-3 / 227E-6

115.85903083700441