In [None]:
import softsampling.categorical.stratification as cat_stratification
import softsampling.utils.preprocessing as preprocessing
import softsampling.categorical.sampling as cat_sampling 
import softsampling.categorical.combination as cat_combination

## Categorical sampling of 1 variable

### Preprocessing

In [None]:
print("PREPROCESSING:")
file_path = r"type_likes.csv" 
df = preprocessing.read_dataframe(file_path)
df = df.drop("likes", axis=1)
df = df.drop("name", axis=1)
print(df[:10])
preprocessing.remove_nan_df(df)
variables = preprocessing.create_lists_from_df(df) 
counters = preprocessing.count_elements_in_variables_single(variables)

all_keys, all_values = preprocessing.dictionary_to_all_lists(counters)

statistics = preprocessing.print_and_collect_statistics_single(variables)

### Stratification

In [None]:
strata_dict = cat_stratification.create_strata_single(counters) 
print("STRATA DICT:")
for key, values in strata_dict.items():
    print(f"Length of '{key}': {len(values)}")

### Sampling

In [None]:
N, _ = cat_sampling.extract_population_size_and_means(statistics)
print(f"Population Size: {N}")
nis, phi = cat_sampling.nis_phi(strata_dict, N)
print(f"Number of observations in each stratum (nis): {nis}")
print(f"Proportion of each stratum (phi): {phi}")

epsilon = 0.05
confidence = 0.95
n = cat_sampling.sample_size(epsilon, confidence)
print("Required sample size:", n)

ni_size = cat_sampling.determine_ni_size_single(phi, all_keys, n)
print("Sample size of each strata:")
for stratum_key, size in ni_size.items():
    print(f"Stratum {stratum_key}: {size} observations")

In [None]:
sample = cat_sampling.create_sample(ni_size, strata_dict)

In [None]:
print(f"Total stratified sample size: {len(sample)}")

In [None]:
final_combination_counts = cat_sampling.count_combinations_final(sample)

print("Counts of each combination:")
for combination, count in final_combination_counts.items():
    print(f"Combination: {combination}, Count: {count}")