In [34]:
import pandas as pd
import json
from pathlib import Path

csv_path = "./data/Variant V.csv"
df = pd.read_csv(csv_path)

# Vertical bar: housing_status (en %)
verticalBar = (
    df["employment_status"]
    .value_counts(normalize=True)
    .mul(100)
    .round(2)
)

others_mask = verticalBar < 3
if others_mask.any():
    others_sum = verticalBar[others_mask].sum()
    verticalBar = verticalBar[~others_mask]
    verticalBar['Others'] = others_sum

verticalBar = verticalBar.reset_index()
verticalBar["color"] = "#33C2EA"
verticalBar.columns = ["key", "value", "color"]


# Horizontal bar: employment_status (en %)
colors = ["#B89DFB", "#758bcf", "#33C2EA", "#FFC182", "#87db72"]

horizontalBar = (
    df["housing_status"]
    .value_counts(normalize=True)
    .mul(100)
    .round(2)
)

others_mask = horizontalBar < 5
if others_mask.any():
    others_sum = horizontalBar[others_mask].sum()
    horizontalBar = horizontalBar[~others_mask]
    horizontalBar['Others'] = others_sum

horizontalBar = horizontalBar.reset_index()
horizontalBar["color"] = [colors[i % len(colors)] for i in range(len(horizontalBar))]
horizontalBar.columns = ["key", "value", "color"]

# Line chart: número de transacciones por mes (absoluto)
lineChart = (
    df.groupby("month")
    .size()
    .reset_index(name="value")
    .rename(columns={"month": "date"})
)

lineChart["date"] = lineChart["date"].apply(lambda x: f"2022-{str(x+1).zfill(2)}-30")

# Área chart:
df["simulated_day"] = df.groupby("month").cumcount()
df["group_in_month"] = (df["simulated_day"] // 10).astype(int)

day_map = {0: 1, 1: 10, 2: 20}
df["day"] = df["group_in_month"].map(day_map)

df["date"] = pd.to_datetime({
    "year": 2022,
    "month": df["month"] + 1,
    "day": df["day"]
})

areaChart = (
    df.groupby("date")["credit_risk_score"]
    .mean()
    .round(2)
    .reset_index(name="value")
)

areaChart["date"] = areaChart["date"].dt.strftime("%Y-%m-%d")

# Pie chart: payment_type (en %)
pieChart = (
    df["payment_type"]
    .value_counts(normalize=True)
    .mul(100)
    .round(2)
    .reset_index()
)
pieChart.columns = ["name", "value"]

eda_data = {
    "horizontalBar": horizontalBar.to_dict(orient="records"),
    "verticalBar": verticalBar.to_dict(orient="records"),
    "lineChart": lineChart.to_dict(orient="records"),
    "areaChart": areaChart.to_dict(orient="records"),
    "pieChart": pieChart.to_dict(orient="records"),
}

output_dir = Path("../web/public")
output_dir.mkdir(parents=True, exist_ok=True)

with open(output_dir / "eda.json", "w", encoding="utf-8") as f:
    json.dump(eda_data, f, indent=2)

print("Archivo eda.json guardado correctamente.")


Archivo eda.json guardado correctamente.
