In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

In [2]:
# Load the dataset
df = pd.read_csv(r"C:\Users\KIIT\Downloads\petrol_consumption.csv")

In [3]:
# 1. Simple Random Sampling
simple_random_sample = df.sample(n=10, random_state=42)

In [4]:
# 2. Systematic Sampling
step = len(df) // 10
systematic_sample = df.iloc[::step][:10]

In [8]:
# 3. Stratified Sampling (assuming 'Petrol_Consumption' is a categorical or can be binned)
df['consumption_bin'] = pd.qcut(df['Petrol_Consumption'], q=4, labels=False)
df_strat = df.dropna(subset=['consumption_bin'])
stratified_sample = train_test_split(df_strat, test_size=0.9, stratify=df_strat['consumption_bin'], random_state=42)

In [9]:
# 4. Cluster Sampling (assuming 'Petrol_Consumption' can be used to form clusters)
df['cluster'] = pd.cut(df['Petrol_Consumption'], bins=3, labels=False)
clusters = df['cluster'].unique()
chosen_cluster = np.random.choice(clusters)
cluster_sample = df[df['cluster'] == chosen_cluster]

In [10]:
# 5. Multi-stage Sampling (randomly select clusters, then random sample within them)
selected_clusters = np.random.choice(clusters, size=2, replace=False)
multi_stage_sample = df[df['cluster'].isin(selected_clusters)].sample(n=10, random_state=42)

In [11]:
# 6. Convenience Sampling (first 10 rows)
convenience_sample = df.head(10)

In [12]:
# 7. Snowball Sampling (simulate by expanding from a random row)
seed = df.sample(n=1, random_state=42)
neighbors = df[(df['Petrol_Consumption'] >= seed['Petrol_Consumption'].values[0] - 10) &
               (df['Petrol_Consumption'] <= seed['Petrol_Consumption'].values[0] + 10)]
snowball_sample = pd.concat([seed, neighbors]).drop_duplicates().head(10)

In [13]:
# 8. Quota Sampling (select a fixed number from each bin)
quota_sample = df.groupby('consumption_bin').apply(lambda x: x.sample(n=2, random_state=42)).reset_index(drop=True)

  quota_sample = df.groupby('consumption_bin').apply(lambda x: x.sample(n=2, random_state=42)).reset_index(drop=True)


In [14]:
# 9. Judgmental/Purposive Sampling (select rows based on a condition)
judgmental_sample = df[df['Petrol_Consumption'] > df['Petrol_Consumption'].mean()].sample(n=10, random_state=42)

In [15]:

# 10. Self-selection Sampling (simulate by filtering a condition, e.g., high consumption)
self_selection_sample = df[df['Petrol_Consumption'] > df['Petrol_Consumption'].quantile(0.75)].sample(n=10, random_state=42)

In [16]:
# 11. Balanced Sampling (sample to match the distribution of a variable)
min_count = df['consumption_bin'].value_counts().min()
balanced_sample = df.groupby('consumption_bin').apply(lambda x: x.sample(n=min_count, random_state=42)).reset_index(drop=True)

  balanced_sample = df.groupby('consumption_bin').apply(lambda x: x.sample(n=min_count, random_state=42)).reset_index(drop=True)


In [17]:
# 12. Time-based Sampling (if data has a time column, here simulated by index)
time_based_sample = df.iloc[::len(df)//10][:10]

In [18]:
# Display samples
print("Simple Random Sample:\n", simple_random_sample)
print("\nSystematic Sample:\n", systematic_sample)
print("\nStratified Sample:\n", stratified_sample)
print("\nCluster Sample:\n", cluster_sample)
print("\nMulti-stage Sample:\n", multi_stage_sample)
print("\nConvenience Sample:\n", convenience_sample)
print("\nSnowball Sample:\n", snowball_sample)
print("\nQuota Sample:\n", quota_sample)
print("\nJudgmental Sample:\n", judgmental_sample)
print("\nSelf-selection Sample:\n", self_selection_sample)
print("\nBalanced Sample:\n", balanced_sample)
print("\nTime-based Sample:\n", time_based_sample)

Simple Random Sample:
     Petrol_tax  Average_income  Paved_Highways  Population_Driver_licence(%)  \
27         7.5          3846.0          9061.0                         0.579   
40         7.0          4449.0          4639.0                         0.626   
26         8.0          3448.0          5399.0                         0.548   
43         7.0          3745.0          2611.0                         0.508   
24         8.5          4574.0          2619.0                         0.551   
37         7.0          3897.0          6385.0                         0.586   
12         7.0          4817.0          6930.0                         0.574   
19         8.5          4341.0          6010.0                         0.677   
4          8.0          4399.0           431.0                         0.544   
25         9.0          3721.0          4746.0                         0.544   

    Petrol_Consumption  
27               631.0  
40               587.0  
26               577.