# DATA MANIPULATION WITH NUMPY

In [151]:
import numpy as np
arr = np.array([1, 2, 3])

In [152]:
print('array is: '.format(repr(arr)))
arr.dtype

array is: 


dtype('int32')

In [153]:
# TYPE CASTING
arr = arr.astype(np.float32)
arr.dtype

dtype('float32')

In [154]:
# FILLER VALUE -- USING nan, infinity(inf)
arr1 = np.array([np.nan, 2, 4])
arr2 = np.array([np.inf, 2, 4])
arr2 = np.array([-np.inf, 2, 4])

arr2

array([-inf,   2.,   4.])

# MATH

In [155]:
#Ranged Data
arr3 = np.arange(5.9, 6.9, 0.1)  # --> start, end, gap
arr3

array([5.9, 6. , 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8])

In [156]:
arr4 = np.linspace(3, 7, num= 8, endpoint= False)
arr4

array([3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5])

In [157]:
np.reshape(arr3, (-1, 10, 1))

array([[[5.9],
        [6. ],
        [6.1],
        [6.2],
        [6.3],
        [6.4],
        [6.5],
        [6.6],
        [6.7],
        [6.8]]])

In [158]:
arr5 = np.array([[1, 2, 4], [5, 7, 9], [3, 6, 3]])
arr5.flatten()
arr5

array([[1, 2, 4],
       [5, 7, 9],
       [3, 6, 3]])

In [186]:
arr6 = np.arange(24)
arr6 = np.reshape(arr6, (3, 4, 2))
transposed = np.transpose(arr6, axes=(1, 2, 0))
transposed 

array([[[ 0,  8, 16],
        [ 1,  9, 17]],

       [[ 2, 10, 18],
        [ 3, 11, 19]],

       [[ 4, 12, 20],
        [ 5, 13, 21]],

       [[ 6, 14, 22],
        [ 7, 15, 23]]])

In [160]:
np.zeros(4)
np.ones((4, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [161]:
np.zeros_like(arr5)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

# RANDOM

In [162]:
#ARITHEMATIC OPERATIONS ON ARRAYS  -- these operations are performed element-wise by default
arr7 = np.array([[1, 2], [3, 4]])
arr7 + 2
arr7 * 2
arr7 - 2
arr7 / 2
arr7 % 2
arr7 // 2
arr7 ** 2
arr7 ** 0.5

array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])

In [163]:
#NON-LINEAR FUNCTIONS
np.exp(arr3)
np.exp2(arr3)
np.log(arr3)
np.log2(arr3)
np.log10(arr3)
np.power(4, arr3)

array([ 3565.77510726,  4096.        ,  4705.06846207,  5404.70440253,
        6208.37505643,  7131.55021452,  8192.        ,  9410.13692414,
       10809.40880505, 12416.75011285])

In [164]:
#MATRIX MULPLICATION
mat1 = np.array([[1, 2], [3, 4], [2, 3]])
mat2 = np.array([[1, 2, 4], [3, 8, 4]])
np.matmul(mat1, mat2)

array([[ 7, 18, 12],
       [15, 38, 28],
       [11, 28, 20]])

In [165]:
#Random Integers 
rand_num = np.random.randint(3, high = 7, size = (1,6))
rand_num

array([[6, 4, 5, 5, 4, 5]])

In [166]:
#Utility Functions
np.random.seed(4)
rand_num = np.random.randint(3, high = 7, size = (1,6))
rand_num

np.random.seed(10)
rand_num = np.random.randint(3, high = 7, size = (1,6))
rand_num

np.random.shuffle(arr)
np.random.shuffle(arr6)
arr6

array([[[16, 17],
        [18, 19],
        [20, 21],
        [22, 23]],

       [[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15]]])

In [167]:
#Distributions 
np.random.uniform(1, 3, size = (5, 2))
np.random.normal(loc = 3, scale = 5, size = 6)    # loc -> mean, scale -> standard deviation

array([ 4.55425368, 11.64687942,  1.79669028, -2.13676012,  5.12007535,
       10.04310437])

In [168]:
#Custom Sampling
arr8 = np.arange(2, 4, 0.9)
np.random.choice(arr8, size = 5, p = [0.2, 0.6, 0.2]) # sum of p(probability) values =1 -- defines the chance to be chosen

array([2. , 3.8, 2. , 2.9, 2. ])

# INDEXING

In [169]:
#Accessing Arrays
arr8[2]
arr8

array([2. , 2.9, 3.8])

In [170]:
#Slicing Arrays
arr8[:1]

array([2.])

In [171]:
#Argmin and Argmax --> returns the index of the smallest or largest value in the array
np.argmin(arr8)  
np.argmax(arr8)

2

In [172]:
np.argmin(arr6)

8

In [173]:
np.argmax(arr6)

7

In [174]:
np.argmin(arr6, axis = 0)

array([[1, 1],
       [1, 1],
       [1, 1],
       [1, 1]], dtype=int64)

In [175]:
arr6[0][2]

array([20, 21])

# FILTERING

In [176]:
arr6 > 3

array([[[ True,  True],
        [ True,  True],
        [ True,  True],
        [ True,  True]],

       [[False, False],
        [False, False],
        [ True,  True],
        [ True,  True]],

       [[ True,  True],
        [ True,  True],
        [ True,  True],
        [ True,  True]]])

In [177]:
arr9 = [[1, 2, np.nan], [np.nan, 4, 5], [6, np.nan, 8]]
np.isnan(arr9)

array([[False, False,  True],
       [ True, False, False],
       [False,  True, False]])

In [178]:
arr10 = np.array([1, 2, 3, 5, 9])
np.where(arr10 > 3)  # finds index according to provided condition

(array([3, 4], dtype=int64),)

In [179]:
np.any(arr6 > 6)  # True
np.all(arr > 3, axis = 0)  # False

False

In [180]:
np.broadcast

numpy.broadcast

# Statistics

In [181]:
np.mean(arr6)
np.sum(arr6)
np.cumsum(arr6)  # running sum of all numbers

array([ 16,  33,  51,  70,  90, 111, 133, 156, 156, 157, 159, 162, 166,
       171, 177, 184, 192, 201, 211, 222, 234, 247, 261, 276], dtype=int32)

In [182]:
arr6.flatten()

array([16, 17, 18, 19, 20, 21, 22, 23,  0,  1,  2,  3,  4,  5,  6,  7,  8,
        9, 10, 11, 12, 13, 14, 15])

In [183]:
arr11 = np.array([[1, 2, 4], [3, 5, 7], [6, 8, 9]])
np.concatenate([arr11, arr9], axis = 1)  #performs horizonal concatenation

array([[ 1.,  2.,  4.,  1.,  2., nan],
       [ 3.,  5.,  7., nan,  4.,  5.],
       [ 6.,  8.,  9.,  6., nan,  8.]])

# Saving And Loading

In [184]:
np.save('arr.npy', arr9)

In [185]:
np.load('arr.npy')

array([[ 1.,  2., nan],
       [nan,  4.,  5.],
       [ 6., nan,  8.]])