# Introduction to Data Science

## Numpy Library Applications

In [1]:
import numpy as np

In [16]:
m = np.array([[1, 2, 3, 4], [6, 7, 8, 9]])
m

array([[1, 2, 3, 4],
       [6, 7, 8, 9]])

In [13]:
n = np.arange(0, 30, 2)
n

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [14]:
n = n.reshape(3, 5) 
n

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [15]:
o = np.linspace(0, 3, 7)
o

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. ])

In [17]:
np.ones((2,2))

array([[1., 1.],
       [1., 1.]])

In [19]:
np.zeros((3,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [20]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [27]:
np.diag(n)

array([ 0, 12, 24])

In [29]:
np.array([1, 2, 3] * 3)

array([1, 2, 3, 1, 2, 3, 1, 2, 3])

In [30]:
np.repeat([1, 2, 3], 3)

array([1, 1, 1, 2, 2, 2, 3, 3, 3])

In [31]:
p = np.ones([3, 3], int)
p

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [32]:
np.vstack([p, p*2])

array([[1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [33]:
np.hstack([p, p*2])

array([[1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2],
       [1, 1, 1, 2, 2, 2]])

## Operations using Numpy

In [48]:
n = np.array([[1, 2], [3, 4]])
m = np.array([[5, 6], [7, 8]])
print(m)
print(n)

[[5 6]
 [7 8]]
[[1 2]
 [3 4]]


In [40]:
m+n

array([[ 6,  8],
       [10, 12]])

In [41]:
m-n

array([[4, 4],
       [4, 4]])

In [42]:
m*n

array([[ 5, 12],
       [21, 32]])

In [43]:
m/n

array([[5.        , 3.        ],
       [2.33333333, 2.        ]])

In [44]:
m.dot(n)

array([[23, 34],
       [31, 46]])

In [49]:
o = np.array([n, n**2])
o

array([[[ 1,  2],
        [ 3,  4]],

       [[ 1,  4],
        [ 9, 16]]])

In [50]:
o.T

array([[[ 1,  1],
        [ 3,  9]],

       [[ 2,  4],
        [ 4, 16]]])

In [55]:
o.dtype

dtype('int32')

In [57]:
o = o.astype('f')
o.dtype

dtype('float32')

In [58]:
a = np.array([1, 2, 3, 4])

In [59]:
a.sum()

10

In [60]:
a.max()

4

In [61]:
a.min()

1

In [62]:
a.mean()

2.5

In [63]:
a.std()

1.118033988749895

In [64]:
a.argmax()

3

In [65]:
a.argmin()

0

## Indexing / Slicing

In [68]:
s = np.arange(13)**2
s

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121, 144],
      dtype=int32)

In [69]:
s[0], s[4], s[0:3]

(0, 16, array([0, 1, 4], dtype=int32))

In [70]:
s[-4:]

array([ 81, 100, 121, 144], dtype=int32)

In [72]:
r = np.arange(36)
r.resize(6,6)
r

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [74]:
r[2,2]

14

In [75]:
r[3, 3:6]

array([21, 22, 23])

In [76]:
r[:2, :-1]

array([[ 0,  1,  2,  3,  4],
       [ 6,  7,  8,  9, 10]])

In [77]:
r[-1, ::2]

array([30, 32, 34])

In [78]:
r[r>30]

array([31, 32, 33, 34, 35])

In [79]:
r2 = r[:3, :3]
r2

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [12, 13, 14]])

In [82]:
r2[:] = 0
r2

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [83]:
r

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [84]:
r_copy = r.copy()
r_copy

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  9, 10, 11],
       [ 0,  0,  0, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35]])

In [85]:
r_copy[:] = 10
print(r_copy)
print()
print(r)

[[10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]
 [10 10 10 10 10 10]]

[[ 0  0  0  3  4  5]
 [ 0  0  0  9 10 11]
 [ 0  0  0 15 16 17]
 [18 19 20 21 22 23]
 [24 25 26 27 28 29]
 [30 31 32 33 34 35]]


## Iterating Over Arrays

In [86]:
test = np.random.randint(0, 10, (4,3))
test

array([[2, 1, 9],
       [2, 6, 1],
       [3, 7, 1],
       [4, 1, 3]])

In [87]:
for row in test:
    print(row)

[2 1 9]
[2 6 1]
[3 7 1]
[4 1 3]


In [88]:
for i in range(len(test)):
    print(test[i])

[2 1 9]
[2 6 1]
[3 7 1]
[4 1 3]


In [89]:
for i, row in enumerate(test):
    print('row', i, 'is', row)

row 0 is [2 1 9]
row 1 is [2 6 1]
row 2 is [3 7 1]
row 3 is [4 1 3]


In [90]:
test2 = test**2
test2

array([[ 4,  1, 81],
       [ 4, 36,  1],
       [ 9, 49,  1],
       [16,  1,  9]], dtype=int32)

In [91]:
for i, j in zip(test, test2):
    print(i, '+', j, '=', i+j)

[2 1 9] + [ 4  1 81] = [ 6  2 90]
[2 6 1] + [ 4 36  1] = [ 6 42  2]
[3 7 1] + [ 9 49  1] = [12 56  2]
[4 1 3] + [16  1  9] = [20  2 12]
