In [3]:
import os
import pandas as pd
from zipfile import ZipFile

# Rutas de entrada y salida
input_zip_path = "../files/input/"
output_path = "../files/output/"
os.makedirs(output_path, exist_ok=True)

# Inicializar DataFrames vacíos para cada categoría
client_data = []
campaign_data = []
economics_data = []

# Procesar todos los archivos .zip en la carpeta de entrada
for zip_filename in os.listdir(input_zip_path):
    if zip_filename.endswith(".zip"):
        with ZipFile(os.path.join(input_zip_path, zip_filename), 'r') as zip_ref:
            for file_name in zip_ref.namelist():
                with zip_ref.open(file_name) as file:
                    df = pd.read_csv(file)

                    # --- Procesamiento para client.csv ---
                    client_cols = ["client_id", "age", "job", "marital", "education", "credit_default", "mortgage"]
                    client_df = df[client_cols].copy()

                    client_df["job"] = client_df["job"].str.replace(".", "", regex=False).str.replace("-", "_", regex=False)
                    client_df["education"] = client_df["education"].str.replace(".", "_", regex=False).replace("unknown", pd.NA)
                    client_df["credit_default"] = client_df["credit_default"].map(lambda x: 1 if x == "yes" else 0)
                    client_df["mortgage"] = client_df["mortgage"].map(lambda x: 1 if x == "yes" else 0)

                    client_data.append(client_df)

                    # --- Procesamiento para campaign.csv ---
                    campaign_cols = [
                        "client_id", "number_contacts", "contact_duration",
                        "previous_campaign_contacts", "previous_outcome", "campaign_outcome", "day", "month"
                    ]
                    campaign_df = df[campaign_cols].copy()

                    campaign_df["previous_outcome"] = campaign_df["previous_outcome"].map(lambda x: 1 if x == "success" else 0)
                    campaign_df["campaign_outcome"] = campaign_df["campaign_outcome"].map(lambda x: 1 if x == "yes" else 0)
                    campaign_df["last_contact_date"] = pd.to_datetime(
                        campaign_df["day"].astype(str) + "-" + campaign_df["month"] + "-2022", format="%d-%b-%Y"
                    )
                    campaign_df.drop(columns=["day", "month"], inplace=True)

                    campaign_data.append(campaign_df)

                    # --- Procesamiento para economics.csv ---
                    economics_cols = ["client_id", "cons_price_idx", "euribor_three_months"]
                    economics_df = df[economics_cols].copy()

                    economics_data.append(economics_df)

# Combinar los datos de todos los archivos y guardar los resultados
final_client_df = pd.concat(client_data, ignore_index=True)
final_client_df.to_csv(os.path.join(output_path, "client.csv"), index=False)

final_campaign_df = pd.concat(campaign_data, ignore_index=True)
final_campaign_df.to_csv(os.path.join(output_path, "campaign.csv"), index=False)

final_economics_df = pd.concat(economics_data, ignore_index=True)
final_economics_df.to_csv(os.path.join(output_path, "economics.csv"), index=False)

print("Archivos generados en la carpeta files/output/")

Archivos generados en la carpeta files/output/
