In [17]:
import pandas as pd
from autoviz.AutoViz_Class import AutoViz_Class  # Importación correcta

def cargar_datos(archivo_csv):
    """Carga el archivo CSV y devuelve un DataFrame."""
    try:
        df = pd.read_csv(archivo_csv, dtype=str)  # Cargar todo como string
        return df
    except FileNotFoundError:
        print(f"⚠️ Error: No se encontró el archivo '{archivo_csv}'.")
        return None

def limpiar_precio(valor):
    """Convierte precios de '1,529' a 1.529 (float)."""
    if isinstance(valor, str):
        return float(valor.replace(",", ".")) if valor.replace(",", "").replace(".", "").isdigit() else None
    return None

def preparar_datos(df, provincia_interes):
    """Filtra la provincia y limpia los datos."""
    if df is None or "Provincia" not in df.columns:
        return None
    
    df_provincia = df[df["Provincia"].str.upper() == provincia_interes].copy()
    
    if df_provincia.empty:
        print(f"⚠️ No hay datos para la provincia {provincia_interes}.")
        return None

    # Convertir precios a formato numérico
    columnas_precios = ["Precio gasolina 95 E5", "Precio gasóleo A"]
    for col in columnas_precios:
        if col in df_provincia.columns:
            df_provincia[col] = df_provincia[col].apply(limpiar_precio)
    
    df_provincia.dropna(subset=columnas_precios, inplace=True)
    
    return df_provincia

def visualizar_con_autoviz(archivo_csv):
    """Genera un informe automático de AutoViz."""
    AV = AutoViz_Class()  # Aquí está la corrección
    report = AV.AutoViz(archivo_csv)
    return report

def main():
    """Función principal."""
    archivo_csv = "gii32_act1_precios_carburantes_24.csv"
    provincia_interes = "MADRID"

    df = cargar_datos(archivo_csv)
    df_provincia = preparar_datos(df, provincia_interes)
    
    if df_provincia is not None and not df_provincia.empty:
        print("\nGenerando informe automático con AutoViz...\n")
        visualizar_con_autoviz(archivo_csv)

# Ejecutar el script
if __name__ == "__main__":
    main()


import matplotlib.pyplot as plt
import os

output_dir = "autoviz_output"
os.makedirs(output_dir, exist_ok=True)

# Guardar todas las figuras activas
figures = [plt.figure(n) for n in plt.get_fignums()]
for i, fig in enumerate(figures):
    fig.savefig(f"{output_dir}/autoviz_plot_{i+1}.png")



Generando informe automático con AutoViz...

Shape of your Data Set loaded: (11865, 10)
#######################################################################################
######################## C L A S S I F Y I N G  V A R I A B L E S  ####################
#######################################################################################
Classifying variables in data set...
    Number of Numeric Columns =  0
    Number of Integer-Categorical Columns =  1
    Number of String-Categorical Columns =  3
    Number of Factor-Categorical Columns =  0
    Number of String-Boolean Columns =  0
    Number of Numeric-Boolean Columns =  0
    Number of Discrete String Columns =  2
    Number of NLP String Columns =  4
    Number of Date Time Columns =  0
    Number of ID Columns =  0
    Number of Columns to Delete =  0
    10 Predictors classified...
        No variables removed since no ID or low-information variables found in data set
To fix these data quality issues in the datase

Unnamed: 0,Data Type,Missing Values%,Unique Values%,Minimum Value,Maximum Value,DQ Issue
Provincia,object,0.0,0,,,13 rare categories: Too many to list. Group them into a single category or drop the categories.
Municipio,object,0.0,29,,,No issue
Localidad,object,0.0,36,,,No issue
Código postal,int64,0.0,38,1002.0,52006.0,No issue
Dirección,object,0.0,98,,,No issue
Precio gasolina 95 E5,object,9.903772,3,,,"1163 missing values. Impute them with mean, median, mode, or a constant value such as 123., Mixed dtypes: has 2 different data types: object, float,, Possible high cardinality column with 414 unique values: Use hash encoding or text embedding to reduce dimension."
Precio gasóleo A,object,3.125266,3,,,"367 missing values. Impute them with mean, median, mode, or a constant value such as 123., Mixed dtypes: has 2 different data types: object, float,, Possible high cardinality column with 438 unique values: Use hash encoding or text embedding to reduce dimension."
Tipo venta,object,0.0,0,,,1 rare categories: ['p']. Group them into a single category or drop the categories.
Tipo servicio,object,8.97556,0,,,"1054 missing values. Impute them with mean, median, mode, or a constant value such as 123., Mixed dtypes: has 2 different data types: object, float,"
Rótulo,object,0.0,34,,,No issue


[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\SGalv\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\SGalv\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\SGalv\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\SGalv\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\SGalv\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]   

All Plots done
Time to run AutoViz = 34 seconds 

 ###################### AUTO VISUALIZATION Completed ########################
