In [2]:
import pandas as pd
import numpy as np
import re

# La lista completa de columnas raw proporcionada por el usuario
raw_columns_str = """Index(['Bankrupt?', ' ROA(C) before interest and depreciation before interest',
        ' ROA(A) before interest and % after tax',
        ' ROA(B) before interest and depreciation after tax',
        ' Operating Gross Margin', ' Realized Sales Gross Margin',
        ' Operating Profit Rate', ' Pre-tax net Interest Rate',
        ' After-tax net Interest Rate',
        ' Non-industry income and expenditure/revenue',
        ' Continuous interest rate (after tax)', ' Operating Expense Rate',
        ' Research and development expense rate', ' Cash flow rate',
        ' Interest-bearing debt interest rate', ' Tax rate (A)',
        ' Net Value Per Share (B)', ' Net Value Per Share (A)',
        ' Net Value Per Share (C)', ' Persistent EPS in the Last Four Seasons',
        ' Cash Flow Per Share', ' Revenue Per Share (Yuan ¥)',
        ' Operating Profit Per Share (Yuan ¥)',
        ' Per Share Net profit before tax (Yuan ¥)',
        ' Realized Sales Gross Profit Growth Rate',
        ' Operating Profit Growth Rate', ' After-tax Net Profit Growth Rate',
        ' Regular Net Profit Growth Rate', ' Continuous Net Profit Growth Rate',
        ' Total Asset Growth Rate', ' Net Value Growth Rate',
        ' Total Asset Return Growth Rate Ratio', ' Cash Reinvestment %',
        ' Current Ratio', ' Quick Ratio', ' Interest Expense Ratio',
        ' Total debt/Total net worth', ' Debt ratio %', ' Net worth/Assets',
        ' Long-term fund suitability ratio (A)', ' Borrowing dependency',
        ' Contingent liabilities/Net worth',
        ' Operating profit/Paid-in capital',
        ' Net profit before tax/Paid-in capital',
        ' Inventory and accounts receivable/Net value', ' Total Asset Turnover',
        ' Accounts Receivable Turnover', ' Average Collection Days',
        ' Inventory Turnover Rate (times)', ' Fixed Assets Turnover Frequency',
        ' Net Worth Turnover Rate (times)', ' Revenue per person',
        ' Operating profit per person', ' Allocation rate per person',
        ' Working Capital to Total Assets', ' Quick Assets/Total Assets',
        ' Current Assets/Total Assets', ' Cash/Total Assets',
        ' Quick Assets/Current Liability', ' Cash/Current Liability',
        ' Current Liability to Assets', ' Operating Funds to Liability',
        ' Inventory/Working Capital', ' Inventory/Current Liability',
        ' Current Liabilities/Liability', ' Working Capital/Equity',
        ' Current Liabilities/Equity', ' Long-term Liability to Current Assets',
        ' Retained Earnings to Total Assets', ' Total income/Total expense',
        ' Total expense/Assets', ' Current Asset Turnover Rate',
        ' Quick Asset Turnover Rate', ' Working capitcal Turnover Rate',
        ' Cash Turnover Rate', ' Cash Flow to Sales', ' Fixed Assets to Assets',
        ' Current Liability to Liability', ' Current Liability to Equity',
        ' Equity to Long-term Liability', ' Cash Flow to Total Assets',
        ' Cash Flow to Liability', ' CFO to Assets', ' Cash Flow to Equity',
        ' Current Liability to Current Assets', ' Liability-Assets Flag',
        ' Net Income to Total Assets', ' Total assets to GNP price',
        ' No-credit Interval', ' Gross Profit to Sales',
        ' Net Income to Stockholder\'s Equity', ' Liability to Equity',
        ' Degree of Financial Leverage (DFL)',
        ' Interest Coverage Ratio (Interest expense to EBIT)',
        ' Net Income Flag', ' Equity to Liability']"""

# Parse the string to get a clean, ordered list of unique column names
all_quoted_strings = re.findall(r"'([^']+)'", raw_columns_str)

unique_columns_ordered = []
seen = set()
for col in all_quoted_strings:
    stripped_col = col.strip() # Remove leading/trailing spaces
    if stripped_col not in seen:
        unique_columns_ordered.append(stripped_col)
        seen.add(stripped_col)

# Eliminar 'Bankrupt?' de la lista final para el CSV de ejemplo
input_columns_for_example = [col for col in unique_columns_ordered if col != 'Bankrupt?']

# Generar una fila de datos inventados para todas las 95 columnas
invented_data = {}
for col in input_columns_for_example:
    # Heurísticas para generar datos plausibles según el nombre de la columna
    if 'Flag' in col:
        invented_data[col] = [np.random.randint(0, 2)] # Flags (0 o 1)
    elif 'Rate' in col or 'Ratio' in col or 'Margin' in col or 'Percent' in col:
        # Tasas, ratios, márgenes, porcentajes suelen estar entre 0 y 1
        invented_data[col] = [np.random.uniform(0.01, 0.99)]
    elif 'Growth Rate' in col:
        # Tasas de crecimiento pueden ser positivas o negativas
        invented_data[col] = [np.random.uniform(-0.5, 0.5)]
    elif 'Per Share' in col or 'Per person' in col or 'Net Value' in col or 'Income' in col or 'Profit' in col or 'Expense' in col:
        # Valores por acción/persona, valor neto, ingresos, ganancias, gastos
        # Pueden ser positivos, y algunos pueden ser grandes o pequeños
        if 'Yuan' in col: # Moneda
             invented_data[col] = [np.random.uniform(0.1, 1000.0)]
        else:
            invented_data[col] = [np.random.uniform(0.01, 50.0)] # Rango más amplio
    elif 'Total assets to GNP price' in col:
        invented_data[col] = [np.random.uniform(0.0001, 0.1)] # Típicamente pequeño
    elif 'Interval' in col or 'Days' in col:
        invented_data[col] = [np.random.uniform(1.0, 365.0)] # Días, intervalos
    elif 'Turnover' in col or 'Frequency' in col or '(times)' in col:
        invented_data[col] = [np.random.uniform(0.5, 10.0)] # Frecuencias de rotación
    else:
        # Valores por defecto para cualquier otra columna
        invented_data[col] = [np.random.uniform(0.0, 1.0)]

# Crear DataFrame con una sola fila
example_df = pd.DataFrame(invented_data)

# Guardar a CSV
output_csv_path = 'full_sample_company_data.csv'
example_df.to_csv(output_csv_path, index=False)

print(f"CSV de ejemplo '{output_csv_path}' generado exitosamente con 95 columnas.")
print("Ahora puedes usar este archivo directamente para subirlo a tu aplicación Streamlit.")
print("\nPrimeras filas del CSV generado:")
print(example_df.head())

CSV de ejemplo 'full_sample_company_data.csv' generado exitosamente con 95 columnas.
Ahora puedes usar este archivo directamente para subirlo a tu aplicación Streamlit.

Primeras filas del CSV generado:
   ROA(C) before interest and depreciation before interest  \
0                                           0.984293         

   ROA(A) before interest and % after tax  \
0                                0.945173   

   ROA(B) before interest and depreciation after tax  Operating Gross Margin  \
0                                           0.891032                0.342794   

   Realized Sales Gross Margin  Operating Profit Rate  \
0                     0.759986               0.560999   

   Pre-tax net Interest Rate  After-tax net Interest Rate  \
0                   0.326494                     0.455002   

   Non-industry income and expenditure/revenue  \
0                                     0.650661   

   Continuous interest rate (after tax)  ...  CFO to Assets  \
0                 