### Exemple d'utilisation de random sample() pour sélectionner plusieurs éléments d'une liste sans répétition

In [3]:
import random

aList = [20, 40, 80, 100, 120]

sampled_list = random.sample(aList, 3)

print(sampled_list)

[80, 40, 120]


### Échantillonnage aléatoire avec remise pour inclure les répétitions

In [4]:
import random
names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
# choisir trois échantillons aléatoires avec remplacement pour inclure la répétition
sampled_list2 = random.choices(names, k=3)
print(sampled_list2)

['John', 'John', 'Adam']


In [5]:
import random 
# créer une liste de 5 nombres aléatoires 
num_list = random.sample(range(100), 8)
print(num_list)

[46, 49, 85, 2, 25, 11, 44, 88]


In [6]:
random.shuffle(num_list)
print(num_list)

[2, 25, 11, 85, 49, 46, 44, 88]


### Echantillonnage aléatoire d’un ensemble Python

In [7]:
aSet = ["Ahmed", "Ali", "Souad", "Houda", "Nihal"]
#random 3 samples from set
sampled_set = random.sample(aSet, 3)
print(sampled_set)

['Souad', 'Ali', 'Houda']


### Échantillonnage aléatoire d’un dictionnaire Python


In [8]:
marks_dict = {'Ahmed': 15, 'Ali': 10, 'Souad': 12, 'Houda': 18, 'Nihal': 20}
sampled_dict = random.sample(sorted(marks_dict.items()), 3)
print(sampled_dict)

[('Souad', 12), ('Houda', 18), ('Ahmed', 15)]


In [9]:
# Access key-value from sample
# First key:value

print(sampled_dict[0][0], sampled_dict[0][1])

# Second key:value

print(sampled_dict[1][0], sampled_dict[1][1])

Souad 12
Houda 18


### Graine aléatoire pour obtenir la même liste d'échantillons à chaque fois

In [10]:
# Randomly select same sample list every time
alist = [40, 80, 100, 120, 20.5, 40.5, 30.5, 50.5, 70.5]

for i in range(5):
    # use 4 as seed
    random.seed(4)
    # get sample list of three items
    sample_list = random.sample(alist, 3)
    print(sample_list)
    

[120, 20.5, 40]
[120, 20.5, 40]
[120, 20.5, 40]
[120, 20.5, 40]
[120, 20.5, 40]


## Application de l'échantillonnage aléatoire à un dataframe

In [11]:
import pandas as pd

In [12]:
df = pd.read_csv('dataset/train.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [13]:
df.shape

(891, 12)

In [17]:
# Randomly select 10 rows from dataframe without replacement
df_sample = df.sample(n=10, replace=False)
df_sample

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
527,528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S
392,393,0,3,"Gustafsson, Mr. Johan Birger",male,28.0,2,0,3101277,7.925,,S
391,392,1,3,"Jansson, Mr. Carl Olof",male,21.0,0,0,350034,7.7958,,S
810,811,0,3,"Alexander, Mr. William",male,26.0,0,0,3474,7.8875,,S
627,628,1,1,"Longley, Miss. Gretchen Fiske",female,21.0,0,0,13502,77.9583,D9,S
818,819,0,3,"Holm, Mr. John Fredrik Alexander",male,43.0,0,0,C 7075,6.45,,S
552,553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q
85,86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gu...",female,33.0,3,0,3101278,15.85,,S
642,643,0,3,"Skoog, Miss. Margit Elizabeth",female,2.0,3,2,347088,27.9,,S
341,342,1,1,"Fortune, Miss. Alice Elizabeth",female,24.0,3,2,19950,263.0,C23 C25 C27,S


In [20]:
df_sample = random.choices(df['Name'], k=10)
df_sample

['Weir, Col. John',
 'Ford, Mrs. Edward (Margaret Ann Watson)',
 'Asplund, Miss. Lillian Gertrud',
 'Alhomaki, Mr. Ilmari Rudolf',
 'McGowan, Miss. Anna "Annie"',
 'Harris, Mr. George',
 'Larsson, Mr. Bengt Edvin',
 'Coleridge, Mr. Reginald Charles',
 'Andersson, Mr. August Edvard ("Wennerstrom")',
 'Ward, Miss. Anna']