In [37]:
import pandas as pd

In [38]:
df = pd.read_csv('data/LineaBase-Edge(Edge).csv', sep=';').drop(columns=['Unnamed: 10'])

# Obtener todos los parquet_name únicos
unique_parquet_names = df['parquet_name'].unique()
print(f"Total de parquet_name únicos: {len(unique_parquet_names)}")
print("\nLista de parquet_name:")
for pn in unique_parquet_names:
    count = len(df[df['parquet_name'] == pn])
    print(f"  - {pn}: {count} campos")

Total de parquet_name únicos: 12

Lista de parquet_name:
  - customer_address: 12 campos
  - customer_kyc: 11 campos
  - customer_legal_customer: 58 campos
  - customer_incompatible_risk_data: 10 campos
  - customer_guarantees: 11 campos
  - customer_contactability: 13 campos
  - customer_checkbooks_pending_delivery: 9 campos
  - customer_card_pending_delivery: 12 campos
  - customer_cards_pending_delivery: 12 campos
  - customer_business_relationships: 5 campos
  - customer_bank_reference_data: 8 campos
  - customer_personal_relationships: 5 campos


In [39]:
import os
import json

# Crear la carpeta contratos si no existe
os.makedirs('contratos', exist_ok=True)

# Iterar sobre cada parquet_name único
contratos_creados = []

for parquet_name in unique_parquet_names:
    # Filtrar el dataframe por el parquet_name actual
    df_filtered = df[df['parquet_name'] == parquet_name]
    
    # Crear la estructura del contrato
    contrato = {
        "contract_name": f"{parquet_name.replace('_', ' ').title()} Contract",
        "version": "1.0",
        "fields": []
    }
    
    # Iterar sobre las filas del dataframe filtrado
    for idx, row in df_filtered.iterrows():
        field = {
            "target_field": row['column_name'],
            "required": True,
            "type": row['column_type'],
            "description": row['description']
        }
        
        # Agregar valor por defecto si existe y no es NaN o '---'
        default_value = row['Valor por defecto']
        if pd.notna(default_value):
            field["default_value"] = default_value
        else:
            field["default_value"] = "NA"
        
        # Me fijo si el campo es freefield y paso el required a False
        if 'freefield' in row['column_name'].lower():
            field["required"] = False
        
        contrato["fields"].append(field)
    
    # Guardar el contrato en un archivo JSON
    filename = f'contratos/contrato_{parquet_name}.json'
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(contrato, f, indent=2, ensure_ascii=False)
    
    contratos_creados.append({
        'parquet_name': parquet_name,
        'filename': filename,
        'num_campos': len(contrato['fields'])
    })

print(f"✓ Se crearon {len(contratos_creados)} contratos:\n")
for contrato_info in contratos_creados:
    print(f"  - {contrato_info['filename']}: {contrato_info['num_campos']} campos")

✓ Se crearon 12 contratos:

  - contratos/contrato_customer_address.json: 12 campos
  - contratos/contrato_customer_kyc.json: 11 campos
  - contratos/contrato_customer_legal_customer.json: 58 campos
  - contratos/contrato_customer_incompatible_risk_data.json: 10 campos
  - contratos/contrato_customer_guarantees.json: 11 campos
  - contratos/contrato_customer_contactability.json: 13 campos
  - contratos/contrato_customer_checkbooks_pending_delivery.json: 9 campos
  - contratos/contrato_customer_card_pending_delivery.json: 12 campos
  - contratos/contrato_customer_cards_pending_delivery.json: 12 campos
  - contratos/contrato_customer_business_relationships.json: 5 campos
  - contratos/contrato_customer_bank_reference_data.json: 8 campos
  - contratos/contrato_customer_personal_relationships.json: 5 campos
