## Generating Data with NumPy

In [1]:
import numpy as np

In [2]:
array_empty = np.empty(shape = (2,3))
array_empty

array([[3.33772792e-307, 4.22786102e-307, 2.78145267e-307],
       [4.00537061e-307, 9.45656391e-308, 0.00000000e+000]])

As you can see "empty" produces a array of zeros and it is not consistent there are chances that random charaters might be generated, so it is not consistent. 

In [3]:
array_0s = np.zeros(shape = (2,3))
array_0s
#np.zeros has consistent output
#it will always create arrays of zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [4]:
array_0s = np.zeros(shape = (2,3), dtype = np.int8 )
array_0s
#np.zeros has consistent output
#it will always create arrays of zeros

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [5]:
array_1s = np.ones(shape = (2,3))
array_1s
#np.ones has consistent output
#it will always create arrays of zeros

array([[1., 1., 1.],
       [1., 1., 1.]])

In [6]:
#np.full generates an array filled entirely with a specified value
#the fill_value takes scalar values
array_full = np.full(shape = (2,3), fill_value = 5)
array_full

array([[5, 5, 5],
       [5, 5, 5]])

In [7]:
array_full = np.full(shape = (2,3), fill_value = "Aditya")
array_full
#this shows that fill_value can take any dtype as it is a scalar

array([['Aditya', 'Aditya', 'Aditya'],
       ['Aditya', 'Aditya', 'Aditya']], dtype='<U6')

### "_like" functions
they are equivalent to np.zeros,ones and full, but we don't need to define there shape,type we just need to provide another array whose shape and type is used

In [8]:
matrix_A = np.array([[1,0,9,2,2],[3,23,4,5,1],[0,2,3,4,1]])

In [9]:
matrix_A

array([[ 1,  0,  9,  2,  2],
       [ 3, 23,  4,  5,  1],
       [ 0,  2,  3,  4,  1]])

In [10]:
array_empty_like = np.empty_like(matrix_A)
array_empty_like

array([[   0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0],
       [1488,    0,    0,    0,    0]])

In [11]:
array_0s_like = np.zeros_like(matrix_A)
array_0s_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

In [12]:
array_1s_like = np.ones_like(matrix_A)
array_1s_like

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

The above mentioned "like" very useful when we need to handle big databases

### np.arange()
It's a NumPy equivalent of Pythons range function and creats a sequence of consecutive integer values within a given range

In [13]:
npr = np.arange(30)
npr
#we don't include 30 on the array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [14]:
array_npr = np.arange(stop = 30)
array_npr

#it is mendatory to give the stop value
#giving just start value wont work as it did in Python 2

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [15]:
array_npr = np.arange(start = 1, stop = 25, step = 2.5)
array_npr

array([ 1. ,  3.5,  6. ,  8.5, 11. , 13.5, 16. , 18.5, 21. , 23.5])

In [16]:
array_npr = np.arange(start = 1, stop = 25, step = 2.5, dtype = np.int32)

array_npr
#as you can see if we change the dtype the no. of output remains the same
#but the python takes step as 2 rather then 2.5

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

## Random Generators

In [17]:
#for this we need
from numpy.random import Generator as gen
from numpy.random import PCG64 as pcg
#PCG = Permutation Congruential Generator
#it can produce a value of 64 bits in size

In [18]:
array_rg = gen(pcg())
array_rg.normal(size = (5,5))

array([[-0.41345603,  1.44907713, -0.09378219,  0.29278017,  0.17510966],
       [ 0.04488735,  1.36401358, -0.2370268 ,  1.35737758,  1.01230583],
       [-1.25688748, -1.01476343, -0.0371388 , -0.47627584, -0.09797856],
       [-1.4449338 , -0.74827697, -0.72090105,  0.00969347, -0.20118927],
       [-0.6983132 ,  0.06116296,  1.78765486, -1.46802963, -0.94898923]])

In [19]:
#Every time we call a method the generator randomly selects a "seed"
#seed is a set of starting parameters for the algorithm
#specifing a seed is useful when we need to work with a model
#and we don't want the data to change
array_rg = gen(pcg(seed = 365))
array_rg.normal(size = (5,5))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

In [20]:
array_rg.normal(size = (5,5))
#here the values have changed as the seed has reset

array([[-0.76065577,  1.48158358,  0.01200258, -0.06846959,  0.25301664],
       [-0.52640788,  0.79613109,  0.28203421,  1.80238008,  0.93932117],
       [-0.53693283, -0.26317689, -1.77723035,  1.14900013, -2.20733915],
       [ 1.54116775, -0.5124932 , -2.14564563,  1.98878673,  0.32208907],
       [-1.2651495 ,  3.2714633 ,  1.78650493, -0.20233675,  0.20427467]])

## Generating Integers, Probabilities and Random Choices

In [23]:
array_rg = gen(pcg(seed = 365))
array_rg.integers(10,size = (5,5))
#here 10 specifies the range in which we want the numbers
#size tells the size

array([[0, 7, 6, 7, 8],
       [6, 6, 2, 0, 6],
       [3, 0, 3, 7, 9],
       [1, 1, 8, 7, 4],
       [4, 8, 6, 4, 9]], dtype=int64)

In [24]:
array_rg = gen(pcg(seed = 365))
array_rg.integers(low = 10,high = 100,size = (5,5))

array([[18, 78, 64, 78, 84],
       [66, 67, 28, 10, 69],
       [45, 15, 37, 74, 96],
       [19, 21, 89, 73, 54],
       [53, 84, 66, 51, 92]], dtype=int64)

In [25]:
array_rg = gen(pcg(seed = 365))
array_rg.random(size = (5,5))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],
       [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],
       [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736],
       [0.29787156, 0.4996155 , 0.4865245 , 0.62740703, 0.54952637],
       [0.64894629, 0.04411757, 0.7206516 , 0.84594003, 0.17159792]])

In [26]:
array_rg = gen(pcg(seed = 365))
array_rg.choice([1,2,3,4,5],size = (5,5))
#here all outcomes as equally likely 
#this is a discrete prob. distribution

array([[1, 4, 4, 4, 5],
       [4, 4, 2, 1, 4],
       [2, 1, 2, 4, 5],
       [1, 1, 5, 4, 3],
       [3, 5, 4, 3, 5]])

In [28]:
array_rg = gen(pcg(seed = 365))
array_rg.choice([1,2,3,4,5],
                p =[0.25,0.25,0.25,0.20,0.05] ,
                size = (5,5))
#here each number has the given prob.

array([[4, 4, 3, 1, 3],
       [1, 3, 1, 4, 2],
       [4, 2, 1, 3, 2],
       [2, 2, 2, 3, 3],
       [3, 1, 3, 4, 1]])

# Video 177

## Application of Random Generators

In [29]:
array_RG = gen(pcg(seed = 365))

#loc is centre of the distribution
#scale is the standard deviation
array_col_1 = array_RG.normal(loc = 2, scale = 3, size = (1000))
array_col_2 = array_RG.normal(loc = 2, scale = 3, size = (1000))
array_col_3 = array_RG.logistic(loc = 11, scale = 3, size = (1000))
array_col_4 = array_RG.exponential(scale = 4, size = (1000))
array_col_5 = array_RG.geometric(p = 0.7, size = (1000))

In [31]:
random_text_data = np.array([array_col_1,
                             array_col_2,
                             array_col_3,
                             array_col_4,
                             array_col_5]).transpose()
random_text_data

array([[ 1.59077303,  1.13261443, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  4.36853483, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674, -0.74094797, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 , -1.98012191,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  3.82646031, 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  3.35506542, 11.79840961,  4.86816939,  1.        ]])

In [32]:
random_text_data.shape

(1000, 5)

In [33]:
np.savetxt("Random-Test-from-NumPy.csv", 
           random_text_data,
          fmt = "%s",
          delimiter=',')

In [34]:
np.genfromtxt("Random-Test-from-NumPy.csv", delimiter= ',')

array([[ 1.59077303,  1.13261443, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  4.36853483, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674, -0.74094797, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 , -1.98012191,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  3.82646031, 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  3.35506542, 11.79840961,  4.86816939,  1.        ]])