In [None]:
#!/usr/bin/env python3
# Usage: cat data/first_hundred_numbers.tsv | python group_in_quantiles.py 4

from sys import stdin, stdout, stderr, argv
import pandas as pd

if len(argv) != 2:
    print("Usage: cat numbers.tsv | python group_in_quantiles.py <n_quantiles>", file=stderr)
    exit(1)

# Read numbers from stdin
numbers_df = pd.read_csv(stdin, header=None, names=["value"])

# Number of quantiles
n_quantiles = int(argv[1])

# Create quantile labels
quantile_labels = [f"q{i+1}" for i in range(n_quantiles)]

# Assign quantile categories with labels
numbers_df["quantile"] = pd.qcut(numbers_df["value"], q=n_quantiles,
                                  labels=quantile_labels, duplicates='drop')

# Get intervals
numbers_df["interval"] = pd.qcut(numbers_df["value"], q=n_quantiles, duplicates='drop')

# Create duplicate quantile column for output format
numbers_df["quantile_dup"] = numbers_df["quantile"]

# Output: value, quantile, quantile, interval
numbers_df[["value", "quantile", "quantile_dup", "interval"]].to_csv(
    stdout, sep="\t", index=False, header=False
)

print(f"Grouped {len(numbers_df)} values into {n_quantiles} quantiles", file=stderr)