# Data Analysis Practise

Chapter 1 - Numpy

In [1]:
import numpy as np

In [2]:
data1 = [1,2.3,4,5,6]

In [3]:
arr = np.array(data1)

In [4]:
arr

array([1. , 2.3, 4. , 5. , 6. ])

In [7]:
data2 = [[1,2,3],[4,5,6]]

In [8]:
arr2 = np.array(data2)

In [9]:
arr2

array([[1, 2, 3],
       [4, 5, 6]])

In [10]:
arr2.ndim

2

In [11]:
arr2.shape

(2, 3)

In [12]:
arr2.dtype

dtype('int32')

In [13]:
zero_arr = np.zeros(10)
zero_arr

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [16]:
zero_arr_2dim = np.zeros((3,2))
zero_arr_2dim

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [17]:
zero_arr_2dim.ndim

2

In [21]:
zero_arr_3dim = np.zeros((4,3,2))
zero_arr_3dim

array([[[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.]]])

In [20]:
zero_arr_3dim.ndim

3

In [22]:
arr_range_10 = np.arange(10)
arr_range_10

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [24]:
# COnversion of tuple to array
tup1 = (1,2,3,4)
arr_tup1 = np.array(tup1)
arr_tup1

array([1, 2, 3, 4])

In [26]:
np.asarray(tup1)

array([1, 2, 3, 4])

In [27]:
np.asarray(arr_tup1)

array([1, 2, 3, 4])

### Test diff between np.array and np.asarray
np.array() works on a copy of the array, whereas, np.asarray() works on the array itself

In [54]:
arr1 = np.array([1,2,3,4,5])
arr1

array([1, 2, 3, 4, 5])

In [55]:
np.array(arr1)[0]

1

In [56]:
np.array(arr1)[0] = 5

In [57]:
arr1

array([1, 2, 3, 4, 5])

In [58]:
np.asarray(arr1)[0] # NO change, since it is modifying the copy of the array

1

In [59]:
np.asarray(arr1)[0] = 5

In [60]:
arr1 # 1st element changes to 5, since it is modifying the array itself

array([5, 2, 3, 4, 5])

## np.ones and np.ones_like or np.zeros and np.zeros_like

np.ones creates a ndarray of the given shape with 1 as all elements.

np.ones_like takes an array and creates an ndarray with the same shape with 1s as all elements

In [61]:
A = np.ones((2,3))

In [62]:
A

array([[1., 1., 1.],
       [1., 1., 1.]])

In [64]:
B = np.ones_like(A)
B

array([[1., 1., 1.],
       [1., 1., 1.]])

### Identity Matrix

In [66]:
M = np.identity(3) # Creates an N X N matrix with ones in the diagonal and 0 otherwise
M

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [68]:
N = np.eye(3)
N

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### Type casting datatypes using astype()

In [69]:
arr1 = np.array([2,3,4,5])
arr1

array([2, 3, 4, 5])

In [70]:
arr1.dtype

dtype('int32')

In [71]:
# Create another array with the same values as arr1 but with float type

In [72]:
arr1_float = arr1.astype(np.float64)
arr1_float

array([2., 3., 4., 5.])

In [73]:
arr1_float.dtype

dtype('float64')

Suppose in the data, the numerical values are present as string. Use the astype() to convert them into numerical data type

In [75]:
num_str = np.array(['1.1','2.2','3.3','4.4'], dtype = np.string_)
num_str

array([b'1.1', b'2.2', b'3.3', b'4.4'], dtype='|S3')

In [76]:
num_str.dtype

dtype('S3')

In [78]:
num_float = num_str.astype(np.float)
num_float

array([1.1, 2.2, 3.3, 4.4])

In [79]:
num_float.dtype

dtype('float64')

###  Slicing - Slicing arrays always returns views (not copies)
Any changes to the slice will cause changes in the arrays directly

If you assign a scalar value to a slice of an array, the value will be "broadcasted" throughout the slice.

In [80]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [81]:
arr[5:8]

array([5, 6, 7])

In [82]:
arr[5:8] = 2
arr

array([0, 1, 2, 3, 4, 2, 2, 2, 8, 9])

Checking same for 2D array

In [84]:
arr2d = np.ones((5,3))
arr2d

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [85]:
arr2d[2:4]

array([[1., 1., 1.],
       [1., 1., 1.]])

In [88]:
arr2d[2:4] = np.array([2,3,4])
arr2d

array([[1., 1., 1.],
       [1., 1., 1.],
       [2., 3., 4.],
       [2., 3., 4.],
       [1., 1., 1.]])

Any modification to a slice of the array even after storing it in another variable will change the main array

In [89]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [93]:
arr_slice = arr[5:8]
arr_slice

array([5, 6, 7])

In [94]:
arr_slice[1] = 100

In [95]:
arr

array([  0,   1,   2,   3,   4,   5, 100,   7,   8,   9])

If you want to copy the slice of the ndarray instead of the view, use arr[5:8].copy()

In [96]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [97]:
arr_copy = arr[5:8].copy()

In [98]:
arr_copy

array([5, 6, 7])

In [99]:
arr_copy[1] = 2

In [100]:
arr_copy

array([5, 2, 7])

In [102]:
arr # The main array did not change

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### Basic Indexing

In [107]:
arr3d = np.array([
    [[1,2,3],[4,5,6]],
    [[7,8,9],[10,11,12]]
])

In [112]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [113]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [114]:
old_vals = arr3d[0].copy()

In [115]:
arr3d[0] = 33
arr3d

array([[[33, 33, 33],
        [33, 33, 33]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [116]:
arr3d[0] = old_vals
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [118]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [119]:
arr2d[0:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [125]:
arr2d[0][2]

3

In [127]:
arr2d[0,2]

3

In [137]:
arr3d = np.array([
    [[1,2,3],
     [2,3,4]],
    [[4,5,6],
     [4,3,5]],
    [[7,8,9],
     [12,1,4]]
])

In [138]:
arr3d

array([[[ 1,  2,  3],
        [ 2,  3,  4]],

       [[ 4,  5,  6],
        [ 4,  3,  5]],

       [[ 7,  8,  9],
        [12,  1,  4]]])

In [143]:
# 9,4
arr3d[2,0:,2]

array([9, 4])

In [144]:
# 5,3
arr3d[1,0:,1]

array([5, 3])

In [145]:
# 2,3 5,3, 8,1
arr3d[0:,0:,1]

array([[2, 3],
       [5, 3],
       [8, 1]])

In [146]:
# 2,3 3,4 5,6 3,5
arr3d[0:2,0:2,1:]

array([[[2, 3],
        [3, 4]],

       [[5, 6],
        [3, 5]]])

In [148]:
# 4,5 4,3
arr3d[1,0:2,0:2]

array([[4, 5],
       [4, 3]])

In [149]:
# 8,9, 1,4
arr3d[2:,0:2,1:]

array([[[8, 9],
        [1, 4]]])

In [151]:
#4,4 7, 12
arr3d[1:,0:,0]

array([[ 4,  4],
       [ 7, 12]])

In [158]:
# 4,5 4,3 7,8
arr3d[1:,0:2,0:2]

array([[[ 4,  5],
        [ 4,  3]],

       [[ 7,  8],
        [12,  1]]])

### Fancy Indexing

In [160]:
arr = np.array([[1,2,3,4],[5,6,7,8],[1,2,3,5]])
arr

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 5]])

In [171]:
arr[[1,2,0,1,0,2,1,0,-2]]

array([[5, 6, 7, 8],
       [1, 2, 3, 5],
       [1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [1, 2, 3, 5],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])