In [None]:
import softsampling.numerical.sampling as num_sampling
import softsampling.utils.preprocessing as preprocessing
import softsampling.numerical.stratification as num_stratification
import softsampling.numerical.combination as num_combination

## Numerical sampling of 1 variable

### Preprocessing

In [None]:
file_path = r"type_likes.csv"
df = preprocessing.read_dataframe(file_path)
df = df.drop(columns=['type'])
preprocessing.remove_nan_df(df)
print(df)

variables = preprocessing.create_lists_from_df(df) 
statistics = preprocessing.print_and_collect_statistics(variables)

### Stratification

In [None]:
num_clusters_list = [3] # Elbow method to be implemented in future version
strata = num_stratification.create_stratum_kmeans(variables, num_clusters_list) 
num_stratification.print_stratum_counts(strata) 
stratum_dict = num_stratification.get_stratum_dict(strata)

### Sampling

First, we calculate the required variables.

In [None]:
print("PRE-SAMPLING")
N, mu = num_sampling.extract_population_size_and_means(statistics)
print("Population size (N):", N)
print("Means (mu):", mu)
nis, phi = num_sampling.nis_phi(stratum_dict, N) 
s = num_sampling.calculate_std_devs_single(stratum_dict) 
print("nis:", len(nis), nis)
print("phi:", len(phi), phi)
print("s: ", s)

epsilon = mu[0] * 0.1

In [None]:
n, ni = num_sampling.nStratifiedSampling(epsilon, 0.95, phi, s, 2, N, nis)
print("Size of the global sample:", n)
print("Sample sizes for each stratum:", ni)

mean_estimate, sampling_error, lower_confidence_interval, upper_confidence_interval = num_sampling.sampling_single(
    stratum_dict,   
    phi,      
    nis,      
    s,        
    ni        
)

print("\nSampling process:")
print("N: ", N, " --> n: ", n)
print("Mu: ", mu[0], " --> x-barra: ", mean_estimate)
print("Confidence interval: (", lower_confidence_interval, upper_confidence_interval, ")")