In [1]:
import pandas as pd
import os
import glob

def quick_parquet_analysis():
    """Análisis rápido de archivos parquet"""
    
    # Buscar archivos parquet
    parquet_files = glob.glob("*.parquet")
    
    if not parquet_files:
        print("❌ No se encontraron archivos .parquet")
        return
    
    print(f"📁 Archivos encontrados: {len(parquet_files)}")
    print("="*60)
    
    for file in parquet_files:
        print(f"\n🔍 {file}")
        print("-" * 40)
        
        try:
            # Cargar solo las primeras filas para análisis rápido
            df = pd.read_parquet(file)
            
            print(f"📊 Forma: {df.shape}")
            print(f"📋 Columnas: {list(df.columns)}")
            print(f"🔢 Tipos de datos:")
            for col, dtype in df.dtypes.items():
                print(f"   - {col}: {dtype}")
            
            print(f"\n📋 Primeras 2 filas:")
            print(df.head(2))
            
        except Exception as e:
            print(f"❌ Error: {e}")

if __name__ == "__main__":
    quick_parquet_analysis()


📁 Archivos encontrados: 3

🔍 test.parquet
----------------------------------------
📊 Forma: (1000, 17)
📋 Columnas: ['dialog', 'act', 'emotion', 'num_utterances', 'dialog_text', 'first_utterance', 'last_utterance', 'act_counts', 'emotion_counts', 'lengths_match', 'most_common_act', 'most_common_emotion', 'most_common_act_encoded', 'most_common_emotion_encoded', 'dialog_length', 'word_count', 'avg_word_length']
🔢 Tipos de datos:
   - dialog: object
   - act: object
   - emotion: object
   - num_utterances: int64
   - dialog_text: object
   - first_utterance: object
   - last_utterance: object
   - act_counts: int64
   - emotion_counts: int64
   - lengths_match: bool
   - most_common_act: object
   - most_common_emotion: object
   - most_common_act_encoded: int32
   - most_common_emotion_encoded: int32
   - dialog_length: int64
   - word_count: int64
   - avg_word_length: float64

📋 Primeras 2 filas:
                                              dialog  \
0  [Hey man , you wanna buy some 