Generating Data With Numpy


In [1]:
import numpy as np 

np.empty(), np.zeros(), np.ones(), np.full()

In [3]:
#np.empty - array is assigned from our RAM
#np.empty- It's output will be random numbers 
array_empty = np.empty(shape = (2,3))
array_empty

array([[0., 0., 0.],
       [0., 0., 0.]])

In [4]:
#np.zeros- It's output will always be an array of zeros
array_0s = np.zeros(shape = (2,3))
array_0s

array([[0., 0., 0.],
       [0., 0., 0.]])

In [5]:
array_0s = np.zeros(shape = (2,3), dtype = np.int8)
array_0s

array([[0, 0, 0],
       [0, 0, 0]], dtype=int8)

In [6]:
#np.ones()
array_1s = np.ones(shape = (2,3))
array_1s

array([[1., 1., 1.],
       [1., 1., 1.]])

In [12]:
#np.full() - Generates an array filled entirely with a specific value 
#contains an mandatory argument fill_value 
#fill_value takes a scalar value 
array_full = np.full(shape = (2,3), fill_value = 'dd')
array_full = np.full(shape = (2,3), fill_value = 7)
array_full

array([[7, 7, 7],
       [7, 7, 7]])

"_like" functions

In [None]:
#Why are _like functions useful ? 
#A second array where we can store a value of each element of the original one 
#Convenient when working with huge databases (faster loading times)

In [14]:
matrix_A = np.array([[1,0,9,9,2], [3,23,4,5,1], [0,2,3,4,1]])
matrix_A 

array([[ 1,  0,  9,  9,  2],
       [ 3, 23,  4,  5,  1],
       [ 0,  2,  3,  4,  1]])

In [15]:
#The empty function does not provide consistent output 
#As the empty_like functions gives you the equivalent array
#The shape should be same regardless of the elements inside the array 
array_empty_like = np.empty_like(matrix_A)
array_empty_like

array([[ 1,  0,  9,  9,  2],
       [ 3, 23,  4,  5,  1],
       [ 0,  2,  3,  4,  1]])

In [16]:
#What is the application of zeros_like in analysis ? 
#Starting point for a planner 
#A switch where we can change the values from 0 to 1 (and back)
array_0s_like = np.zeros_like(matrix_A)
array_0s_like

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])

np.arange()

In [1]:
# arange = array range 
# -Numpy's equivalent of Pythons range function 
# _Creates a sequence of consecutive integer values withing a given range 
# - range -> range object 
# - array range -> array 

In [19]:
# range(0,30)
# list(range(0,30))

In [20]:
array_rng = np.arange(30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [26]:
array_rng = np.arange(start = 0, stop  = 30)
array_rng

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

In [27]:
array_rng = np.arange(start = 0, stop  = 30, step = 2.5)
array_rng 

array([ 0. ,  2.5,  5. ,  7.5, 10. , 12.5, 15. , 17.5, 20. , 22.5, 25. ,
       27.5])

In [29]:
#30 / 2.5 = 12 steps 
#Useful approach when step is float 
#The function works solely within the realmn of integers 
array_rng = np.arange(start = 0, stop  = 30, step = 2.5, dtype = np.int32)
array_rng

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22], dtype=int32)

Random Generators 


Defining Random Generators 


In [31]:
#The generator function takes a bit generator as an input and creates generator objects
#Every time we call a method, the generator randomly selects a seed 
#Seed - a set of starting parameters for the algorithm
from numpy.random import Generator as gen 
from numpy.random import PCG64 as pcg 

In [34]:
array_RG = gen(pcg())
array_RG.normal()
array_RG.normal(size = 5)
array_RG.normal(size = (5,5))


array([[ 0.4334196 ,  0.68903471, -1.34638795, -0.42717551, -0.18551899],
       [ 0.47163598,  0.44222543,  0.43060996,  2.86398955,  0.34729064],
       [-0.62764929,  1.83187364,  0.39638993,  0.97778693, -0.04339143],
       [ 0.69554777, -1.44047371, -0.3714382 ,  1.16042265,  0.4229192 ],
       [ 1.79546439, -0.21795932, -0.93060487,  0.53587003,  0.97891372]])

In [37]:
#pcg is used to specify a seed for the generator object so that we get the desired output each and every time 
#A seed lasts for one execution of a method or function before it is reset 
#A seed need to be reset each and every time
array_RG = gen(pcg(seed = 365))
array_RG.normal(size = (5,5))

array([[-0.13640899,  0.09414431, -0.06300442,  1.05391641, -0.6866818 ],
       [-0.50922173, -0.7999526 ,  0.73041825,  0.08825439, -2.1177576 ],
       [ 0.65526774, -0.48095012, -0.5519114 , -0.58578662, -0.98257896],
       [ 1.12378166, -1.30984316, -0.04703774,  0.955272  ,  0.26071745],
       [-0.20023668, -1.50172484, -1.4929163 ,  0.96535084,  1.18694633]])

In [39]:
array_RG.normal(size = (5,5))

array([[ 0.58830139,  0.43063835,  0.42871861, -0.49041682,  0.58326844],
       [-0.10015833, -1.27535497,  0.94226986,  0.01434193, -1.20783944],
       [ 0.16767707, -1.35242456,  0.51855231,  0.66726422,  0.5453976 ],
       [-0.46037542,  2.10611129,  0.79579772, -1.44055324, -0.62070809],
       [ 2.16041849, -0.50664074,  0.08321514,  0.87416817,  1.98530199]])

Generating Integers, Probabilities and Random Choices

In [46]:
#numpy.integers() -generates whole numbers (integers)
#Requires defining a fixed range of values to choose from 
#If we only provide a single value it automatically assumes we want integers only between 0 and 10 (10 excluded)
array_RG = gen(pcg(seed = 365))
#10 is the upper limit 
array_RG.integers(10, size = (5,5))
array_RG.integers(low = 10, high = 100, size = (5,5))

array([[16, 80, 72, 19, 43],
       [23, 36, 46, 54, 75],
       [53, 30, 66, 43, 59],
       [14, 68, 28, 13, 58],
       [74, 74, 86, 37, 25]])

In [47]:
#Generating Probabilities 
array_RG = gen(pcg(seed = 365))
array_RG.random(size = (5,5))

array([[0.75915734, 0.7662218 , 0.6291028 , 0.20336599, 0.66501486],
       [0.06559111, 0.71326309, 0.10812106, 0.87969046, 0.49405844],
       [0.82472673, 0.45652944, 0.07367232, 0.69628564, 0.36690736],
       [0.29787156, 0.4996155 , 0.4865245 , 0.62740703, 0.54952637],
       [0.64894629, 0.04411757, 0.7206516 , 0.84594003, 0.17159792]])

In [49]:
#Random Choices 
array_RG = gen(pcg(seed = 365))
array_RG.choice([1,2,3,4,5], size = (5,5))

array([[1, 4, 4, 4, 5],
       [4, 4, 2, 1, 4],
       [2, 1, 2, 4, 5],
       [1, 1, 5, 4, 3],
       [3, 5, 4, 3, 5]])

Generating Arrays from Distributions 

In [52]:
#Poisson Distribution
#Over a fixed interval of time, distance or space we expect an event to occur exactly once 
array_RG = gen(pcg(seed = 365))
array_RG.poisson(size = (5,5))

array([[2, 0, 1, 1, 2],
       [1, 1, 0, 1, 1],
       [1, 2, 1, 1, 0],
       [0, 1, 0, 2, 1],
       [0, 1, 0, 0, 2]])

In [53]:
array_RG = gen(pcg(seed = 365))
array_RG.poisson(lam = 10, size = (5,5))

array([[11, 12, 12, 14, 13],
       [ 9, 10, 11, 11,  8],
       [11,  8, 10,  9, 14],
       [ 7,  8,  9, 15, 15],
       [13,  8,  8,  7,  9]])

In [56]:
#Binomial Distributions 
#Measures how many times a certain outcome can appear over a series of trials, where there are only 2 possible outcomes
array_RG = gen(pcg(seed = 365))
# n - number of trials 
# p - proability of getting our desired outcomes 
array_RG.binomial(n = 100, p = 0.4, size = (5,5))

array([[42, 44, 30, 36, 45],
       [36, 41, 38, 42, 41],
       [35, 31, 35, 46, 29],
       [41, 41, 46, 34, 48],
       [45, 45, 45, 40, 43]])

In [57]:
#Logistic Distribution
array_RG = gen(pcg(seed = 365))
array_RG.logistic(loc = 9, scale = 1.2, size = (5,5))

array([[10.37767822, 10.42451863,  9.63404367,  7.36153427,  9.82286787],
       [ 5.81223125, 10.09354231,  6.46790532, 11.38740256,  8.97147918],
       [10.85844698,  8.79081317,  5.962079  ,  9.99560681,  8.34539118],
       [ 7.97105522,  8.9981544 ,  8.93530194,  9.6253307 ,  9.23850869],
       [ 9.73729284,  5.3090678 , 10.13723528, 11.04372782,  7.11078651]])

Applications of Random Generators 


In [58]:
#when real data is not available,we sometimes need pseudo-random data to test how well a program performs 
#this is called test creation

In [61]:
array_RG = gen(pcg(seed = 365))

array_column1 = array_RG.normal(loc = 2, scale = 3, size = (1000))
array_column2 = array_RG.normal(loc = 7, scale = 2, size = (1000))
array_column3 = array_RG.logistic(loc = 11, scale = 3, size = (1000))
array_column4 = array_RG.exponential(scale = 4, size = (1000))
array_column5 = array_RG.geometric(p = 0.7, size = (1000))

In [62]:
random_test_data = np.array([array_column1, array_column2, array_column3, array_column4, array_column5])
random_test_data

array([[ 1.59077303,  2.28243293,  1.81098674, ...,  0.1973629 ,
        -2.21015722,  2.91161235],
       [ 6.42174295,  8.57902322,  5.17270135, ...,  4.3465854 ,
         8.2176402 ,  7.90337695],
       [10.14698427, 15.93309953, -0.46878789, ...,  2.66485989,
        12.69328115, 11.79840961],
       [ 6.91500737,  6.243605  ,  2.44997251, ...,  0.80935387,
         0.50644607,  4.86816939],
       [ 1.        ,  1.        ,  1.        , ...,  1.        ,
         2.        ,  1.        ]])

In [63]:
random_test_data.shape 

(5, 1000)

In [64]:
random_test_data = np.array([array_column1, array_column2, array_column3, array_column4, array_column5]).transpose()
random_test_data

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])

In [66]:
random_test_data.shape

(1000, 5)

In [67]:
np.savetxt("Random-Test-from-Numpy.csv", random_test_data, fmt = '%s', delimiter = ',')

In [68]:
np.genfromtxt('Random-Test-from-Numpy.csv', delimiter = ',')

array([[ 1.59077303,  6.42174295, 10.14698427,  6.91500737,  1.        ],
       [ 2.28243293,  8.57902322, 15.93309953,  6.243605  ,  1.        ],
       [ 1.81098674,  5.17270135, -0.46878789,  2.44997251,  1.        ],
       ...,
       [ 0.1973629 ,  4.3465854 ,  2.66485989,  0.80935387,  1.        ],
       [-2.21015722,  8.2176402 , 12.69328115,  0.50644607,  2.        ],
       [ 2.91161235,  7.90337695, 11.79840961,  4.86816939,  1.        ]])