# random.choice() | random.poisson() | random.rand()

In [1]:
import pandas as pd
import numpy as np

# set a random seed
np.random.seed(5)

# gender 60% male 40% female
# age from poisson distribution with lambda=25
# score a random integer from 0 to 100
df = pd.DataFrame({'gender':np.random.choice(a=['m','f'], size=20, p=[0.6,0.4]),
                   'age':np.random.poisson(lam=25, size=20),
                   'score_a':np.random.randint(100, size=20),
                   'score_b':np.random.randint(100, size=20),
                   'score_c':np.random.randint(100, size=20)})

df

Unnamed: 0,gender,age,score_a,score_b,score_c
0,m,26,83,57,2
1,f,22,92,19,2
2,m,22,83,64,91
3,f,38,49,8,65
4,m,28,4,8,45
5,f,21,82,71,87
6,f,30,36,12,1
7,m,29,41,20,23
8,m,10,20,59,50
9,m,18,32,72,86


# random.shuffle()

In [2]:
# set a random seed 
np.random.seed(5) 
arr = df.values 
np.random.shuffle(arr) 
arr

array([['m', 22, 83, 64, 91],
       ['f', 21, 82, 71, 87],
       ['m', 30, 94, 62, 9],
       ['m', 26, 66, 0, 23],
       ['m', 30, 15, 72, 24],
       ['f', 22, 92, 19, 2],
       ['f', 27, 31, 86, 54],
       ['m', 30, 10, 74, 19],
       ['m', 23, 22, 32, 64],
       ['f', 27, 14, 43, 64],
       ['m', 29, 41, 20, 23],
       ['m', 28, 4, 8, 45],
       ['m', 10, 20, 59, 50],
       ['m', 18, 32, 72, 86],
       ['m', 26, 83, 57, 2],
       ['m', 23, 63, 35, 1],
       ['f', 30, 36, 12, 1],
       ['m', 14, 9, 69, 73],
       ['f', 26, 70, 15, 77],
       ['f', 38, 49, 8, 65]], dtype=object)

# logical_and() | logical_or()

In [3]:
x = np.arange(5) 
np.logical_and(x>1, x<4)

array([False, False,  True,  True, False])

In [4]:
np.logical_or(x < 1, x > 3)

array([ True, False, False, False,  True])

# where()

In [5]:
df['score_a_pass'] = np.where(df.score_a>=50,"Pass","Fail") 

df.head()

Unnamed: 0,gender,age,score_a,score_b,score_c,score_a_pass
0,m,26,83,57,2,Pass
1,f,22,92,19,2,Pass
2,m,22,83,64,91,Pass
3,f,38,49,8,65,Fail
4,m,28,4,8,45,Fail


# select()

In [6]:
choices = ['Mister','Lady','Boy', 'Girl']

conditions = [
    (df['gender'] == 'm') & (df['age']>20),
    (df['gender'] == 'f') & (df['age']>20),
    (df['gender'] == 'm') & (df['age']<=20),
    (df['gender'] == 'f') & (df['age']<=20)
    ]
    
df['demo'] = np.select(conditions, choices, default=np.nan)
df.head(10)

Unnamed: 0,gender,age,score_a,score_b,score_c,score_a_pass,demo
0,m,26,83,57,2,Pass,Mister
1,f,22,92,19,2,Pass,Lady
2,m,22,83,64,91,Pass,Mister
3,f,38,49,8,65,Fail,Lady
4,m,28,4,8,45,Fail,Mister
5,f,21,82,71,87,Pass,Lady
6,f,30,36,12,1,Fail,Lady
7,m,29,41,20,23,Fail,Mister
8,m,10,20,59,50,Fail,Boy
9,m,18,32,72,86,Fail,Boy


# digitize()

In [7]:
bins = np.array([0, 20, 40, 60, 80, 100])
df['Bins'] = np.digitize(df.score_a, bins)
df.head(10)

Unnamed: 0,gender,age,score_a,score_b,score_c,score_a_pass,demo,Bins
0,m,26,83,57,2,Pass,Mister,5
1,f,22,92,19,2,Pass,Lady,5
2,m,22,83,64,91,Pass,Mister,5
3,f,38,49,8,65,Fail,Lady,3
4,m,28,4,8,45,Fail,Mister,1
5,f,21,82,71,87,Pass,Lady,5
6,f,30,36,12,1,Fail,Lady,2
7,m,29,41,20,23,Fail,Mister,3
8,m,10,20,59,50,Fail,Boy,2
9,m,18,32,72,86,Fail,Boy,2


# split()

In [8]:
data_a, data_b, data_c = np.split(df.values,  [int(0.6 * len(df.values)), int(0.8*len(df.values))])
data_a

array([['m', 26, 83, 57, 2, 'Pass', 'Mister', 5],
       ['f', 22, 92, 19, 2, 'Pass', 'Lady', 5],
       ['m', 22, 83, 64, 91, 'Pass', 'Mister', 5],
       ['f', 38, 49, 8, 65, 'Fail', 'Lady', 3],
       ['m', 28, 4, 8, 45, 'Fail', 'Mister', 1],
       ['f', 21, 82, 71, 87, 'Pass', 'Lady', 5],
       ['f', 30, 36, 12, 1, 'Fail', 'Lady', 2],
       ['m', 29, 41, 20, 23, 'Fail', 'Mister', 3],
       ['m', 10, 20, 59, 50, 'Fail', 'Boy', 2],
       ['m', 18, 32, 72, 86, 'Fail', 'Boy', 2],
       ['m', 30, 10, 74, 19, 'Fail', 'Mister', 1],
       ['f', 27, 31, 86, 54, 'Fail', 'Lady', 2]], dtype=object)

In [9]:
data_b

array([['m', 30, 15, 72, 24, 'Fail', 'Mister', 1],
       ['m', 23, 22, 32, 64, 'Fail', 'Mister', 2],
       ['f', 26, 70, 15, 77, 'Pass', 'Lady', 4],
       ['m', 14, 9, 69, 73, 'Fail', 'Boy', 1]], dtype=object)

In [10]:
data_c

array([['m', 23, 63, 35, 1, 'Pass', 'Mister', 4],
       ['m', 30, 94, 62, 9, 'Pass', 'Mister', 5],
       ['f', 27, 14, 43, 64, 'Fail', 'Lady', 1],
       ['m', 26, 66, 0, 23, 'Pass', 'Mister', 4]], dtype=object)

# clip()

In [13]:
x = np.array([30, 20, 50, 70, 50, 100, 10, 130, -20, -10, 200]) 

np.clip(x,0,100)

array([ 30,  20,  50,  70,  50, 100,  10, 100,   0,   0, 100])

# extract()

In [14]:
np.extract( (x>100) | (x<0), x )

array([130, -20, -10, 200])

# unique()

In [15]:
# How to count the unique values of an array
x = np.array([0,0,0,1,1,1,0,0,2,2])
unique, counts = np.unique(x, return_counts=True)
dict(zip(unique, counts))

{0: 5, 1: 3, 2: 2}

# argmax() | argmin() | argsort() | argpartition()

In [16]:
np.argmax(np.array(df.score_a))

17

In [17]:
df.iloc[np.argmax(np.array(df.score_a))]

gender               m
age                 30
score_a             94
score_b             62
score_c              9
score_a_pass      Pass
demo            Mister
Bins                 5
Name: 17, dtype: object

In [18]:
df.iloc[np.argsort(np.array(df.score_a))]

Unnamed: 0,gender,age,score_a,score_b,score_c,score_a_pass,demo,Bins
4,m,28,4,8,45,Fail,Mister,1
15,m,14,9,69,73,Fail,Boy,1
10,m,30,10,74,19,Fail,Mister,1
18,f,27,14,43,64,Fail,Lady,1
12,m,30,15,72,24,Fail,Mister,1
8,m,10,20,59,50,Fail,Boy,2
13,m,23,22,32,64,Fail,Mister,2
11,f,27,31,86,54,Fail,Lady,2
9,m,18,32,72,86,Fail,Boy,2
6,f,30,36,12,1,Fail,Lady,2


In [19]:
x =  np.array([30, 20, 50, 70, 50, 100, 10, 130, -20, -10, 200])
indexes = np.argpartition(x, -5)[-5:]
indexes

array([ 2,  3,  5,  7, 10])

In [20]:
x[indexes]

array([ 50,  70, 100, 130, 200])

In [21]:
Tops =pd.DataFrame(df[['score_a','score_b','score_c']].\
                   apply(lambda x:list(df[['score_a','score_b','score_c']].\
                                       columns[np.array(x).argsort()[::-1][:3]]), axis=1).\
                   to_list(),  columns=['Top1', 'Top2', 'Top3'])
Tops

Unnamed: 0,Top1,Top2,Top3
0,score_a,score_b,score_c
1,score_a,score_b,score_c
2,score_c,score_a,score_b
3,score_c,score_a,score_b
4,score_c,score_b,score_a
5,score_c,score_a,score_b
6,score_a,score_b,score_c
7,score_a,score_c,score_b
8,score_b,score_c,score_a
9,score_c,score_b,score_a
