In [38]:
import pandas as pd
import zipfile
import os

def process_csv_zip_files(input_folder, output_folder):
    # Crear un DataFrame vacío para combinar datos
    combined_df = pd.DataFrame()

    # Leer todos los archivos .csv.zip en la carpeta
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".zip"):
            zip_path = os.path.join(input_folder, file_name)
            with zipfile.ZipFile(zip_path, 'r') as z:
                csv_files = [name for name in z.namelist() if name.endswith(".csv")]
                if len(csv_files) == 1:
                    with z.open(csv_files[0]) as f:
                        df = pd.read_csv(f)
                        combined_df = pd.concat([combined_df, df], ignore_index=True)

    # Eliminar la columna 'Unnamed: 0' si existe
    if 'Unnamed: 0' in combined_df.columns:
        combined_df = combined_df.drop(columns=['Unnamed: 0'])

    # Limpiar los datos según las transformaciones requeridas
    combined_df['job'] = combined_df['job'].str.replace('-', '_', regex=True)
    combined_df['job'] = combined_df['job'].str.replace(r'\.$', '', regex=True)
    combined_df['education'] = combined_df['education'].fillna('unknown')
    combined_df['education'] = combined_df['education'].str.replace(r'\.', '_', regex=True)
    combined_df['education'] = combined_df['education'].replace('unknown', pd.NA)
    combined_df['credit_default'] = [1 if value == "yes" else 0 for value in combined_df['credit_default']]
    combined_df['mortgage'] = [1 if value == "yes" else 0 for value in combined_df['mortgage']]
    combined_df['campaign_outcome'] = [1 if value == "yes" else 0 for value in combined_df['campaign_outcome']]
    combined_df['previous_outcome'] = [1 if value == "success" else 0 for value in combined_df['previous_outcome']]
    
    # Crear el DataFrame `client`
    client_columns = [
        'client_id', 'age', 'job', 'marital', 'education', 'credit_default', 'mortgage'
    ]
  
    client_df = combined_df[client_columns]
    client_df.to_csv(os.path.join(output_folder, 'client.csv'), index=False)
    print(client_df['credit_default'].unique())  
    # Crear el DataFrame `campaign`
    campaign_columns = [
        'client_id', 'number_contacts', 'contact_duration',
        'previous_campaign_contacts', 'previous_outcome', 'campaign_outcome'
    ]
    campaign_df = combined_df[campaign_columns].copy()

    # Crear la columna 'last_contact_day' combinando 'day' y 'month' con el año 2022
    campaign_df['last_contact_date'] = pd.to_datetime(
        combined_df['day'].astype(str) + '-' + combined_df['month'].astype(str) + '-2022',
        format='%d-%b-%Y'
    )
    campaign_df.to_csv(os.path.join(output_folder, 'campaign.csv'), index=False)

    # Crear el DataFrame `economics`
    economics_columns = ['client_id', 'cons_price_idx', 'euribor_three_months']
    economics_df = combined_df[economics_columns]
    economics_df.to_csv(os.path.join(output_folder, 'economics.csv'), index=False)

    print("Archivos generados exitosamente en la carpeta:", output_folder)
    


# Rutas de entrada y salida
input_folder = "../files/input"  # Cambia esta ruta según la ubicación de tus archivos
output_folder = "../files/output"  # Cambia esta ruta si es necesario

# Crear la carpeta de salida si no existe
os.makedirs(output_folder, exist_ok=True)

# Llamar a la función para procesar los archivos
process_csv_zip_files(input_folder, output_folder)




[0 1]
Archivos generados exitosamente en la carpeta: ../files/output
