<a href="https://colab.research.google.com/github/JesusjrGalvez/Tutorial_numpy/blob/main/3_Sampling_Methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

In [None]:
perm = np.random.permutation(8)
print(perm)

[2 3 7 4 0 5 6 1]


In [None]:
perm = np.random.permutation([1, 4, 9, 12, 15])
print(perm)


[15  4 12  9  1]


In [None]:
arr = np.arange(12).reshape((4, 3))
print(np.random.permutation(arr))

[[ 6  7  8]
 [ 3  4  5]
 [ 0  1  2]
 [ 9 10 11]]


### set seed

In [None]:
np.random.seed(0)

### set random state which is independent from the global seed

In [None]:
rs = np.random.RandomState(3211)
rs.rand(10)

array([0.41142872, 0.17957013, 0.44040579, 0.95888953, 0.02245548,
       0.51608109, 0.86035062, 0.92110506, 0.76694036, 0.73250356])

### generate a random sample from interval [0, 1) in a given shape

In [None]:
# generate a random scalar
print(np.random.rand())    
print(np.random.rand(4, 4) ) 

0.5488135039273248
[[0.71518937 0.60276338 0.54488318 0.4236548 ]
 [0.64589411 0.43758721 0.891773   0.96366276]
 [0.38344152 0.79172504 0.52889492 0.56804456]
 [0.92559664 0.07103606 0.0871293  0.0202184 ]]


In [None]:
# generate a 1-D array
print(np.random.rand(3))           

[0.83261985 0.77815675 0.87001215]


In [None]:
# generate a 2-D array
print(np.random.rand(3,3))          

[[0.97861834 0.79915856 0.46147936]
 [0.78052918 0.11827443 0.63992102]
 [0.14335329 0.94466892 0.52184832]]


### generate a sample from the standard normal distribution (mean = 0, var = 1)

In [None]:
arr = np.random.randn(3,3)
print(arr)
dev = np.std(arr)
print(dev)
m = np.mean(arr)
print(m)

[[-0.73922809  0.9879891  -0.02647272]
 [-1.26266935 -1.56116514 -0.93935385]
 [ 0.33545292 -1.48975161 -1.06691075]]
0.8312189187614462
-0.6402343879925384


### generate an array of random integers in a given interval [low, high)

In [None]:
print(np.dtype.__i)

dtype


In [None]:
# np.ranodm.randint(low, high, size, dtype)
print(np.random.randint(-100, 10, 3, 'i8'))

[-24 -27 -82]


In [None]:
print(np.random.randint(-100, 100, (4)))

[-100    9   -8   65]


In [None]:
arr = np.random.randint(0, 100, (3, 4))
print(arr)


def compute_impurity(feature, impurity_criterion):
    """
    This function calculates impurity of a feature.
    Supported impurity criteria: 'entropy', 'gini'
    input: feature (this needs to be a Pandas series)
    output: feature impurity
    """
    probs = feature.value_counts(normalize=True)

    if impurity_criterion == 'entropy':
        impurity = -1 * np.sum(np.log2(probs) * probs)
    elif impurity_criterion == 'gini':
        impurity = 1 - np.sum(np.square(probs))
    else:
        raise ValueError('Unknown impurity criterion')

    return (round(impurity, 3))

print(compute_impurity(arr, "gini"))

[[47 61 50 68]
 [76 57  4 36]
 [80 82 37 94]]


AttributeError: ignored

### generate an array of random floating-point numbers in the interval [0.0, 1.0)

In [None]:
# the following methods are the same as np.random.rand()
print(f"np.random.random_sample(10) = {np.random.random_sample(10)}")
print(f"\nnp.random.random(10) = {np.random.random(10)}")
print(f"\nnp.random.ranf(10) = {np.random.ranf(10)}")
print(f"\nnp.random.sample(10) = {np.random.sample(10)}")

np.random.random_sample(10) = [0.68636205 0.9768018  0.60859331 0.32837895 0.80269214 0.83636403
 0.41527995 0.41560826 0.93661711 0.72218402]

np.random.random(10) = [0.03290133 0.42594595 0.09389447 0.97854755 0.57521247 0.64367809
 0.36446268 0.66771489 0.57184364 0.77468348]

np.random.ranf(10) = [0.90010213 0.40430653 0.89534511 0.06508248 0.68547738 0.23107005
 0.5388685  0.77303498 0.74658429 0.72143011]

np.random.sample(10) = [0.48289162 0.19402282 0.20556632 0.53546826 0.43781425 0.97498981
 0.30082393 0.07896991 0.72687797 0.97542413]


### generate a random sample from a given 1-D array

In [None]:
# np.random.choice(iterable_or_int, size, replace=True, p=weights)
print(np.random.choice(range(3), 10, replace=True, p=[0.1, 0.8, 0.1]))

[1 1 0 1 1 1 1 2 1 1]


In [None]:
print(np.random.choice(3, 10))

[0 0 2 0 2 2 2 0 0 0]


In [None]:
print(np.random.choice([1,2,3], 10))

[2 3 1 2 3 3 3 2 1 1]


### shuffle an array in place

In [144]:
arr = np.array(range(10))
print(arr)

[0 1 2 3 4 5 6 7 8 9]


In [146]:
np.random.shuffle(arr)
print(arr)

[9 0 6 5 2 4 3 8 7 1]


Apparently it does not work if you put all code in one line. 

In [147]:
arr = np.random.shuffle(np.array(range(10)))
print(arr)

None


### generate a permutation of an array

In [None]:
# similar to np.random.shuffle(), but it returns a copy rather than making changes in place
arr = np.array(range(10))
print('The initial array: ', arr)
print('A permutation of the array: ', np.random.permutation(arr))

The initial array:  [0 1 2 3 4 5 6 7 8 9]
A permutation of the array:  [9 6 1 2 8 7 5 3 4 0]


In [154]:
arr = np.random.randint(0, 10, (6, 3))
print(arr)
arr2 = np.random.permutation(arr)
print(arr2)

[[2 7 8]
 [0 7 4]
 [8 4 4]
 [0 4 8]
 [0 0 4]
 [7 3 7]]
[[2 7 8]
 [0 0 4]
 [7 3 7]
 [0 7 4]
 [0 4 8]
 [8 4 4]]
