# Data Analytics with numpy

## numpy arrays

In [1]:
import numpy as np

In [2]:
lst = [4, 5, 6]

In [6]:
lst

[4, 5, 6]

In [3]:
type(lst)

list

In [4]:
np_vec = np.array(lst)

In [5]:
np_vec

array([4, 5, 6])

In [7]:
type(np_vec)

numpy.ndarray

In [8]:
np_vec2 = np.array([1, 2, 3, 4])

In [10]:
np_vec2

array([1, 2, 3, 4])

In [11]:
nested_lst = [[1, 2], [3, 4]]

In [12]:
nested_np = np.array(nested_lst)

In [13]:
nested_np

array([[1, 2],
       [3, 4]])

In [14]:
mixed_list = ["A", 1]

In [16]:
mixed_list

['A', 1]

In [17]:
np.array(mixed_list)

array(['A', '1'], dtype='<U1')

## List comprehension

In [18]:
# [0, 1, 4, 9, 16, 25, ....]

In [19]:
[x**2 for x in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [20]:
lst_loop = []

for x in range(10):
    lst_loop.append(x**2)
lst_loop

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [21]:
np.array([x**2 for x in range(10)])

array([ 0,  1,  4,  9, 16, 25, 36, 49, 64, 81])

## Built-in functions

In [25]:
%timeit np.arange(0,10)

1.3 µs ± 266 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [26]:
%timeit list(range(10))

687 ns ± 239 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [27]:
%timeit np.array(range(10))

The slowest run took 4.03 times longer than the fastest. This could mean that an intermediate result is being cached.
25.8 µs ± 14 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [28]:
def func1():
    for i in range(1000000):
        pass

In [29]:
def func2():
    for i in np.arange(1000000):
        pass

In [30]:
def func3():
    for i in list(range(1000000)):
        pass

In [31]:
%timeit func1()

58.9 ms ± 9.54 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [32]:
%timeit func2()

272 ms ± 113 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [33]:
%timeit func3()

97 ms ± 2.8 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [34]:
np.arange(0, 12, 2)

array([ 0,  2,  4,  6,  8, 10])

In [35]:
np.arange(0, 12)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [36]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [39]:
two_d_array = np.zeros( (3, 4) )

In [43]:
two_d_array

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [42]:
two_d_array[1][2]

0.0

In [44]:
 np.ones( (5, 6) )

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [45]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [47]:
np_matrix = np.array([[1, 2, 3], [4, 5,6 ], [7, 8, 9]])

In [48]:
np_matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [49]:
np.matmul( np.eye(3), np_matrix )

array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]])

In [50]:
np_matrix + np.eye(3)

array([[ 2.,  2.,  3.],
       [ 4.,  6.,  6.],
       [ 7.,  8., 10.]])

## linspace

In [54]:
xx = np.linspace(0, 2*np.pi, 100)

In [55]:
yy = np.sin(xx)

In [56]:
yy

array([ 0.00000000e+00,  6.34239197e-02,  1.26592454e-01,  1.89251244e-01,
        2.51147987e-01,  3.12033446e-01,  3.71662456e-01,  4.29794912e-01,
        4.86196736e-01,  5.40640817e-01,  5.92907929e-01,  6.42787610e-01,
        6.90079011e-01,  7.34591709e-01,  7.76146464e-01,  8.14575952e-01,
        8.49725430e-01,  8.81453363e-01,  9.09631995e-01,  9.34147860e-01,
        9.54902241e-01,  9.71811568e-01,  9.84807753e-01,  9.93838464e-01,
        9.98867339e-01,  9.99874128e-01,  9.96854776e-01,  9.89821442e-01,
        9.78802446e-01,  9.63842159e-01,  9.45000819e-01,  9.22354294e-01,
        8.95993774e-01,  8.66025404e-01,  8.32569855e-01,  7.95761841e-01,
        7.55749574e-01,  7.12694171e-01,  6.66769001e-01,  6.18158986e-01,
        5.67059864e-01,  5.13677392e-01,  4.58226522e-01,  4.00930535e-01,
        3.42020143e-01,  2.81732557e-01,  2.20310533e-01,  1.58001396e-01,
        9.50560433e-02,  3.17279335e-02, -3.17279335e-02, -9.50560433e-02,
       -1.58001396e-01, -

In [57]:
np.sin(5)

-0.9589242746631385

In [58]:
np.sin( np.array([1, 5, 6]))

array([ 0.84147098, -0.95892427, -0.2794155 ])

### Random numbers

In [59]:
rand_vec = np.random.rand(100)
rand_vec

array([2.07233062e-01, 7.30769002e-01, 6.59791864e-01, 6.81025637e-01,
       2.48557484e-01, 2.71871045e-01, 7.98990123e-01, 7.49208249e-01,
       5.72233075e-01, 7.19896884e-01, 8.88027820e-03, 2.76608823e-04,
       5.03071453e-02, 8.65511112e-01, 3.58309014e-01, 8.40501672e-01,
       9.03427833e-01, 9.32651150e-01, 6.14676351e-01, 8.82393520e-01,
       8.51672909e-01, 9.52241513e-01, 5.50925613e-01, 7.45574908e-01,
       3.04163985e-01, 2.38570689e-01, 4.26198297e-01, 7.82322230e-01,
       4.75883856e-01, 9.38394012e-01, 7.61078526e-01, 9.22307495e-01,
       1.71856121e-01, 7.25176738e-03, 3.79195419e-01, 8.16633668e-01,
       6.82256725e-01, 3.50835792e-01, 3.68318579e-01, 8.90676062e-01,
       8.20226164e-01, 2.26176131e-01, 9.19472847e-01, 6.02745204e-01,
       2.11978342e-01, 8.87075297e-01, 7.59906158e-01, 4.01817384e-01,
       3.74450760e-01, 8.93435301e-01, 8.98109442e-01, 9.65878629e-01,
       1.20312523e-01, 1.82925082e-01, 2.72745498e-01, 7.39754564e-01,
      

In [60]:
rand_matrix = np.random.rand(5, 5)
rand_matrix

array([[0.8349315 , 0.99580634, 0.84686293, 0.10481451, 0.40017336],
       [0.81453703, 0.51849209, 0.33604286, 0.98237668, 0.17386774],
       [0.38914509, 0.74001759, 0.24992495, 0.31066586, 0.7993317 ],
       [0.90880565, 0.60182417, 0.21522295, 0.06798405, 0.85852678],
       [0.22272768, 0.70482772, 0.51398234, 0.34348099, 0.41894107]])

In [61]:
np.random.randn(5, 5)

array([[ 1.04328029,  1.46487476,  0.43344817, -1.41157425, -0.65185633],
       [-1.73220232, -1.08918884, -0.83809476, -0.08218435,  0.3420978 ],
       [ 0.2142321 , -0.689139  ,  2.96929339,  0.6567088 ,  0.45753991],
       [ 1.00418736, -1.8381635 ,  0.09898517,  0.47093167,  0.61791758],
       [ 0.63931784,  1.42934777, -0.47613175,  0.60120961, -0.61932642]])

In [62]:
5 + 5 * np.random.randn(10)

array([ 7.82899038, 11.67320934, -2.78428661,  3.479198  ,  7.94668005,
        8.08610305,  0.98817602, -0.30634372, -1.19914283, 11.47233162])

In [65]:
np_vec = np.array([x**3 for x in range(10)])

In [66]:
np_vec

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [67]:
rand_int = np.random.randint(0, len(np_vec), 100)

In [68]:
rand_int

array([3, 1, 7, 3, 5, 0, 5, 7, 2, 4, 7, 0, 7, 1, 5, 0, 0, 1, 3, 9, 5, 9,
       9, 6, 4, 8, 0, 5, 9, 5, 4, 9, 1, 2, 5, 0, 9, 6, 0, 9, 5, 6, 3, 9,
       4, 7, 1, 6, 7, 6, 8, 1, 3, 8, 7, 0, 0, 5, 1, 6, 2, 0, 6, 1, 5, 7,
       0, 5, 2, 4, 9, 7, 5, 9, 5, 7, 7, 5, 7, 2, 2, 4, 3, 5, 0, 8, 3, 8,
       6, 0, 1, 7, 9, 2, 4, 2, 4, 5, 9, 3])

In [69]:
np_vec[rand_int]

array([ 27,   1, 343,  27, 125,   0, 125, 343,   8,  64, 343,   0, 343,
         1, 125,   0,   0,   1,  27, 729, 125, 729, 729, 216,  64, 512,
         0, 125, 729, 125,  64, 729,   1,   8, 125,   0, 729, 216,   0,
       729, 125, 216,  27, 729,  64, 343,   1, 216, 343, 216, 512,   1,
        27, 512, 343,   0,   0, 125,   1, 216,   8,   0, 216,   1, 125,
       343,   0, 125,   8,  64, 729, 343, 125, 729, 125, 343, 343, 125,
       343,   8,   8,  64,  27, 125,   0, 512,  27, 512, 216,   0,   1,
       343, 729,   8,  64,   8,  64, 125, 729,  27])

## Numpy methods and attributes

In [70]:
np_vec

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [71]:
np_matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [72]:
len(np_vec)

10

In [73]:
np_vec.size

10

In [74]:
len(np_matrix)

3

In [75]:
np_matrix.size

9

In [76]:
np_matrix.reshape(9, 1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [78]:
np_matrix.reshape(1, 9)

array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [79]:
np_matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [80]:
np_matrix.reshape(-1, 1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [83]:
np_matrix.reshape(3, -1)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [84]:
np_matrix

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [85]:
np_matrix.max()

9

In [86]:
np_matrix.min()

1

In [87]:
np_matrix.argmax()

8

In [88]:
np_matrix.argmin()

0

In [90]:
np_matrix.shape

(3, 3)

In [91]:
np_vec

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [92]:
np_vec.shape

(10,)

In [94]:
np_vec.reshape(1, -1).shape

(1, 10)

In [95]:
np_vec.reshape(1, -1) # Matrix

array([[  0,   1,   8,  27,  64, 125, 216, 343, 512, 729]])

In [96]:
np_vec # Vector

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [97]:
np_matrix.dtype

dtype('int64')