# SAMPLING TECHNIQUES

## Libraries

In [68]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

## Dataset

In [47]:
population_salary = np.random.randint(100, size = 30)
population_salary

array([ 8, 89, 48, 36, 56, 61,  3, 70, 75, 27, 47, 55, 25,  0, 35, 59, 41,
       13, 22, 49, 78, 41, 19, 14, 75, 88, 62, 37, 67, 94])

In [49]:
population_gender = ["M", "F", "M", "M", "F", "M", "M", "M", "M", "F", "M", "F", "M", "F", "F", "F", "M", "F", "M", "M", "F", "M", "M", "M", "M", "F", "M", "F", "F", "F"]
len(population_gender)

30

In [55]:
population_annual_spend = np.random.randint(10, size = 30)
population_annual_spend

array([6, 8, 8, 9, 1, 0, 9, 1, 0, 3, 9, 0, 9, 9, 9, 0, 1, 9, 8, 0, 4, 9,
       0, 7, 9, 4, 4, 3, 8, 6])

In [61]:
population_location = ["Mumbai", "Hyderabad", "Mumbai", "Delhi", "Chennai", "Chennai", "Delhi", "Noida", "Mumbai", "Hyderabad", "Mumbai", "Delhi", "Chennai", "Chennai", "Delhi", "Noida", "Mumbai", "Hyderabad", "Mumbai", "Delhi", "Chennai", "Chennai", "Delhi", "Noida", "Mumbai", "Hyderabad", "Chennai", "Chennai", "Delhi", "Noida"]
len(population_location)

30

In [63]:
df_population = pd.DataFrame({"Salary": population_salary, "Gender": population_gender, "Annual Spend": population_annual_spend, "Location": population_location})
df_population.head()

Unnamed: 0,Salary,Gender,Annual Spend,Location
0,8,M,6,Mumbai
1,89,F,8,Hyderabad
2,48,M,8,Mumbai
3,36,M,9,Delhi
4,56,F,1,Chennai


## 1. Simple Random Sampling

In [65]:
sampled_data = df_population.sample(10)
sampled_data

Unnamed: 0,Salary,Gender,Annual Spend,Location
0,8,M,6,Mumbai
10,47,M,9,Mumbai
3,36,M,9,Delhi
17,13,F,9,Hyderabad
27,37,F,3,Chennai
23,14,M,7,Noida
29,94,F,6,Noida
16,41,M,1,Mumbai
11,55,F,0,Delhi
4,56,F,1,Chennai


## 2. Stratified Sampling
- **Based on gender**

In [76]:
stratified_sample, _ = train_test_split(df_population, train_size = 0.1, stratify = df_population["Gender"])
stratified_sample

Unnamed: 0,Salary,Gender,Annual Spend,Location
26,62,M,4,Chennai
5,61,M,0,Chennai
11,55,F,0,Delhi


In [81]:
stratified_sample["Gender"].value_counts()

Gender
M    2
F    1
Name: count, dtype: int64

## 3. Systematic Sampling
- **Every 5th data**

In [90]:
k = 5
start = np.random.randint(0, k) 
print(start) # For start of systematic sampling

0


In [92]:
systematic_sample = df_population.iloc[start : : k]
systematic_sample

Unnamed: 0,Salary,Gender,Annual Spend,Location
0,8,M,6,Mumbai
5,61,M,0,Chennai
10,47,M,9,Mumbai
15,59,F,0,Noida
20,78,F,4,Chennai
25,88,F,4,Hyderabad


## 4. Clustering Sampling
- **Based on location**

In [106]:
chosen_cluster = np.random.choice(df_population["Location"].unique())
clustered_sample = df_population[df_population["Location"] == chosen_cluster]
clustered_sample

Unnamed: 0,Salary,Gender,Annual Spend,Location
0,8,M,6,Mumbai
2,48,M,8,Mumbai
8,75,M,0,Mumbai
10,47,M,9,Mumbai
16,41,M,1,Mumbai
18,22,M,8,Mumbai
24,75,M,9,Mumbai


## 5. Convenience Sampling

In [109]:
df_population.head(10)

Unnamed: 0,Salary,Gender,Annual Spend,Location
0,8,M,6,Mumbai
1,89,F,8,Hyderabad
2,48,M,8,Mumbai
3,36,M,9,Delhi
4,56,F,1,Chennai
5,61,M,0,Chennai
6,3,M,9,Delhi
7,70,M,1,Noida
8,75,M,0,Mumbai
9,27,F,3,Hyderabad


## 5. Judgemental Sampling
- **Select data with annual spend more than 8 lakhs**

In [138]:
judged_sample = df_population[df_population["Annual Spend"] > 8]
judged_sample

Unnamed: 0,Salary,Gender,Annual Spend,Location
3,36,M,9,Delhi
6,3,M,9,Delhi
10,47,M,9,Mumbai
12,25,M,9,Chennai
13,0,F,9,Chennai
14,35,F,9,Delhi
17,13,F,9,Hyderabad
21,41,M,9,Chennai
24,75,M,9,Mumbai


## 6. Snowball Sampling
- **Should be done manually**

## 7. Quota Sampling
- **Picking data of 5 male and 5 females having salary less than 50 lakhs**

In [128]:
quota_sample_male = df_population[(df_population["Gender"] == "M") & (df_population["Salary"] < 50)].head(5)
quota_sample_female = df_population[(df_population["Gender"] == "F") & (df_population["Salary"] < 50)].head(5)
quota_sample = pd.concat([quota_sample_male, quota_sample_female])
quota_sample

Unnamed: 0,Salary,Gender,Annual Spend,Location
0,8,M,6,Mumbai
2,48,M,8,Mumbai
3,36,M,9,Delhi
6,3,M,9,Delhi
10,47,M,9,Mumbai
9,27,F,3,Hyderabad
13,0,F,9,Chennai
14,35,F,9,Delhi
17,13,F,9,Hyderabad
27,37,F,3,Chennai
