In [1]:
# Definir variables de entorno necesarias para GCS y el bucket
import os

os.environ["DATA_BUCKET"] = "energia-tfm-bucket"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = (
    "C:/Users/Administrador/Documents/TFM/tfm-energy-ingest/tfm-energia-streamlit-cloud-814352512664.json"
)
print("Variables de entorno definidas para DATA_BUCKET y GOOGLE_APPLICATION_CREDENTIALS.")

Variables de entorno definidas para DATA_BUCKET y GOOGLE_APPLICATION_CREDENTIALS.


In [2]:
import io
import os
import warnings
from typing import List, Optional

import numpy as np
import pandas as pd
import pyarrow.parquet as pq
from google.cloud import storage

warnings.filterwarnings("ignore")


class GCSFileViewer:
    """
    Clase para visualizar y analizar archivos CSV y Parquet desde Google Cloud Storage
    """

    def __init__(self, project_id: Optional[str] = None, credentials_path: Optional[str] = None):
        """
        Inicializar el cliente GCS

        Args:
            project_id: ID del proyecto GCP (opcional)
            credentials_path: Ruta al archivo de credenciales (opcional)
        """
        if credentials_path:
            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path

        self.client = storage.Client(project=project_id)
        print("‚úÖ Cliente GCS inicializado correctamente")

    def list_files(self, bucket_name: str, prefix: str = "", limit: int = 100) -> List[str]:
        """
        Listar archivos en un bucket

        Args:
            bucket_name: Nombre del bucket
            prefix: Prefijo para filtrar archivos
            limit: N√∫mero m√°ximo de archivos a listar

        Returns:
            Lista de nombres de archivos
        """
        bucket = self.client.bucket(bucket_name)
        blobs = bucket.list_blobs(prefix=prefix, max_results=limit)

        files = []
        print(f"üìÅ Archivos en gs://{bucket_name}/{prefix}:")
        print("-" * 80)

        for blob in blobs:
            size_mb = blob.size / (1024 * 1024) if blob.size else 0
            print(f"üìÑ {blob.name} ({size_mb:.2f} MB)")
            files.append(blob.name)

        print(f"\nüîç Total: {len(files)} archivos encontrados")
        return files

    def read_file(self, bucket_name: str, file_path: str, **kwargs) -> pd.DataFrame:
        """
        Leer archivo CSV o Parquet desde GCS

        Args:
            bucket_name: Nombre del bucket
            file_path: Ruta del archivo
            **kwargs: Argumentos adicionales para pd.read_csv o pd.read_parquet

        Returns:
            DataFrame de pandas
        """
        try:
            # Determinar tipo de archivo por extensi√≥n
            file_extension = file_path.lower().split(".")[-1]

            if file_extension in ["csv", "txt"]:
                return self._read_csv(bucket_name, file_path, **kwargs)
            elif file_extension in ["parquet", "parque"]:
                return self._read_parquet(bucket_name, file_path, **kwargs)
            else:
                # Intentar como CSV por defecto
                print(f"‚ö†Ô∏è  Extensi√≥n '{file_extension}' no reconocida. Intentando como CSV...")
                return self._read_csv(bucket_name, file_path, **kwargs)

        except Exception as e:
            print(f"‚ùå Error leyendo archivo: {str(e)}")
            return pd.DataFrame()

    def _read_csv(self, bucket_name: str, file_path: str, **kwargs) -> pd.DataFrame:
        """Leer archivo CSV"""
        bucket = self.client.bucket(bucket_name)
        blob = bucket.blob(file_path)

        # Descargar como string
        content = blob.download_as_text()

        # Leer con pandas
        df = pd.read_csv(io.StringIO(content), **kwargs)
        print(f"‚úÖ CSV le√≠do: {df.shape[0]} filas, {df.shape[1]} columnas")
        return df

    def _read_parquet(self, bucket_name: str, file_path: str, **kwargs) -> pd.DataFrame:
        """Leer archivo Parquet con manejo de errores de esquema"""
        gcs_path = f"gs://{bucket_name}/{file_path}"

        try:
            # M√©todo 1: Pandas directo
            df = pd.read_parquet(gcs_path, **kwargs)
            print(f"‚úÖ Parquet le√≠do: {df.shape[0]} filas, {df.shape[1]} columnas")
            return df

        except Exception as e1:
            print(f"‚ö†Ô∏è  Error con pandas directo: {str(e1)}")
            print("üîÑ Intentando con PyArrow...")

            try:
                # M√©todo 2: PyArrow con opciones compatibles
                import pyarrow.parquet as pq

                # Descargar archivo temporalmente
                bucket = self.client.bucket(bucket_name)
                blob = bucket.blob(file_path)

                # Leer bytes y usar PyArrow
                parquet_bytes = blob.download_as_bytes()
                table = pq.read_table(
                    io.BytesIO(parquet_bytes),
                    use_pandas_metadata=False,  # Ignorar metadatos pandas
                    **kwargs,
                )
                df = table.to_pandas()
                print(f"‚úÖ Parquet le√≠do con PyArrow: {df.shape[0]} filas, {df.shape[1]} columnas")
                return df

            except Exception as e2:
                print(f"‚ö†Ô∏è  Error con PyArrow: {str(e2)}")
                print("üîÑ Intentando con esquema flexible...")

                try:
                    # M√©todo 3: Leer con esquema m√°s flexible
                    import pyarrow.parquet as pq

                    parquet_bytes = blob.download_as_bytes()
                    parquet_file = pq.ParquetFile(io.BytesIO(parquet_bytes))

                    # Leer por chunks para evitar conflictos de esquema
                    dfs = []
                    for batch in parquet_file.iter_batches(batch_size=1000):
                        chunk_df = batch.to_pandas()
                        # Convertir columnas categ√≥ricas a string
                        for col in chunk_df.columns:
                            if chunk_df[col].dtype.name == "category":
                                chunk_df[col] = chunk_df[col].astype(str)
                            elif pd.api.types.is_categorical_dtype(chunk_df[col]):
                                chunk_df[col] = chunk_df[col].astype(str)
                        dfs.append(chunk_df)

                    if dfs:
                        df = pd.concat(dfs, ignore_index=True)
                        print(f"‚úÖ Parquet le√≠do con esquema flexible: {df.shape[0]} filas, {df.shape[1]} columnas")
                        return df
                    else:
                        print("‚ùå No se pudieron leer datos del archivo")
                        return pd.DataFrame()

                except Exception as e3:
                    print(f"‚ùå Error final: {str(e3)}")
                    print("üí° Sugerencia: El archivo podr√≠a estar corrupto o tener un esquema muy complejo")
                    return pd.DataFrame()

    def preview_file(self, bucket_name: str, file_path: str, n_rows: int = 10) -> None:
        """
        Vista previa r√°pida del archivo

        Args:
            bucket_name: Nombre del bucket
            file_path: Ruta del archivo
            n_rows: N√∫mero de filas a mostrar
        """
        print(f"üëÄ Vista previa de: gs://{bucket_name}/{file_path}")
        print("=" * 80)

        df = self.read_file(bucket_name, file_path)
        if df.empty:
            return

        # Informaci√≥n b√°sica
        print(f"üìä Dimensiones: {df.shape[0]} filas √ó {df.shape[1]} columnas")
        print(f"üíæ Memoria: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
        print()

        # Mostrar primeras filas
        print(f"üîù Primeras {min(n_rows, len(df))} filas:")
        print("-" * 80)
        display(df.head(n_rows))

        # Informaci√≥n de columnas
        print("\nüìã Informaci√≥n de columnas:")
        print("-" * 80)
        display(df.info())

        # Estad√≠sticas b√°sicas para columnas num√©ricas
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) > 0:
            print(f"\nüìà Estad√≠sticas b√°sicas ({len(numeric_cols)} columnas num√©ricas):")
            print("-" * 80)
            display(df[numeric_cols].describe())

    def diagnose_parquet(self, bucket_name: str, file_path: str) -> dict:
        """
        Diagnosticar problemas en archivo Parquet

        Args:
            bucket_name: Nombre del bucket
            file_path: Ruta del archivo

        Returns:
            Diccionario con informaci√≥n de diagn√≥stico
        """
        try:
            bucket = self.client.bucket(bucket_name)
            blob = bucket.blob(file_path)
            parquet_bytes = blob.download_as_bytes()
            parquet_file = pq.ParquetFile(io.BytesIO(parquet_bytes))

            schema = parquet_file.schema_arrow
            metadata = parquet_file.metadata

            diagnosis = {
                "schema": str(schema),
                "num_columns": schema.num_columns,
                "num_rows": metadata.num_rows,
                "num_row_groups": metadata.num_row_groups,
                "file_size_bytes": len(parquet_bytes),
                "column_info": {},
            }

            # Informaci√≥n detallada de columnas
            for i, field in enumerate(schema):
                diagnosis["column_info"][field.name] = {"type": str(field.type), "nullable": field.nullable}

            print(f"üîç Diagn√≥stico de: gs://{bucket_name}/{file_path}")
            print("=" * 80)
            print(f"üìä Filas: {diagnosis['num_rows']:,}")
            print(f"üìã Columnas: {diagnosis['num_columns']}")
            print(f"üì¶ Row Groups: {diagnosis['num_row_groups']}")
            print(f"üíæ Tama√±o: {diagnosis['file_size_bytes']:,} bytes")

            print("\nüìù Esquema de columnas:")
            print("-" * 80)
            for col, info in diagnosis["column_info"].items():
                print(f"  ‚Ä¢ {col}: {info['type']}")

            return diagnosis

        except Exception as e:
            print(f"‚ùå Error en diagn√≥stico: {str(e)}")
            return {}
        """
        An√°lisis completo del archivo
        
        Args:
            bucket_name: Nombre del bucket
            file_path: Ruta del archivo
        
        Returns:
            Diccionario con m√©tricas del an√°lisis
        """
        df = self.read_file(bucket_name, file_path)
        if df.empty:
            return {}

        analysis = {
            "shape": df.shape,
            "memory_usage_mb": df.memory_usage(deep=True).sum() / 1024**2,
            "dtypes": df.dtypes.to_dict(),
            "null_counts": df.isnull().sum().to_dict(),
            "null_percentages": (df.isnull().sum() / len(df) * 100).to_dict(),
            "numeric_columns": list(df.select_dtypes(include=[np.number]).columns),
            "categorical_columns": list(df.select_dtypes(include=["object"]).columns),
            "datetime_columns": list(df.select_dtypes(include=["datetime64"]).columns),
        }

        # Mostrar an√°lisis
        print(f"üîç An√°lisis completo de: gs://{bucket_name}/{file_path}")
        print("=" * 80)
        print(f"üìê Dimensiones: {analysis['shape'][0]:,} filas √ó {analysis['shape'][1]} columnas")
        print(f"üíæ Uso de memoria: {analysis['memory_usage_mb']:.2f} MB")
        print(f"üî¢ Columnas num√©ricas: {len(analysis['numeric_columns'])}")
        print(f"üìù Columnas categ√≥ricas: {len(analysis['categorical_columns'])}")
        print(f"üìÖ Columnas datetime: {len(analysis['datetime_columns'])}")

        # Valores nulos
        null_cols = [col for col, pct in analysis["null_percentages"].items() if pct > 0]
        if null_cols:
            print(f"\n‚ö†Ô∏è  Columnas con valores nulos ({len(null_cols)}):")
            for col in null_cols[:10]:  # Mostrar solo las primeras 10
                pct = analysis["null_percentages"][col]
                print(f"   ‚Ä¢ {col}: {pct:.1f}%")
            if len(null_cols) > 10:
                print(f"   ... y {len(null_cols) - 10} m√°s")

        return analysis


# =============================================================================
# FUNCIONES DE UTILIDAD
# =============================================================================


def quick_preview(bucket_name: str, file_path: str, n_rows: int = 5):
    """Vista previa r√°pida - funci√≥n standalone"""
    viewer = GCSFileViewer()
    viewer.preview_file(bucket_name, file_path, n_rows)


def list_bucket_files(bucket_name: str, prefix: str = "", limit: int = 50):
    """Listar archivos - funci√≥n standalone"""
    viewer = GCSFileViewer()
    return viewer.list_files(bucket_name, prefix, limit)


def diagnose_parquet_file(bucket_name: str, file_path: str):
    """Diagnosticar archivo Parquet - funci√≥n standalone"""
    viewer = GCSFileViewer()
    return viewer.diagnose_parquet(bucket_name, file_path)


def load_gcs_file(bucket_name: str, file_path: str, **kwargs) -> pd.DataFrame:
    """Cargar archivo - funci√≥n standalone"""
    viewer = GCSFileViewer()
    return viewer.read_file(bucket_name, file_path, **kwargs)


# =============================================================================
# EJEMPLOS DE USO
# =============================================================================

if __name__ == "__main__":
    # Configuraci√≥n
    BUCKET_NAME = "energia-tfm-bucket"

    # Inicializar viewer
    viewer = GCSFileViewer()

    # Ejemplo 1: Listar archivos
    print("üöÄ EJEMPLO 1: Listar archivos")
    files = viewer.list_files(BUCKET_NAME, prefix="curated/prices/", limit=10)

    # Ejemplo 2: Vista previa r√°pida
    if files:
        print("\nüöÄ EJEMPLO 2: Vista previa del primer archivo")
        viewer.preview_file(BUCKET_NAME, files[0], n_rows=5)

    # Ejemplo 3: Cargar archivo completo
    if files:
        print("\nüöÄ EJEMPLO 3: Cargar archivo completo")
        df = viewer.read_file(BUCKET_NAME, files[0])
        print(f"DataFrame cargado: {df.shape}")

# =============================================================================
# INSTRUCCIONES DE USO
# =============================================================================
"""
üìñ GU√çA DE USO:

1. CONFIGURACI√ìN INICIAL:
   viewer = GCSFileViewer()
   # O con credenciales espec√≠ficas:
   viewer = GCSFileViewer(project_id="tu-proyecto", credentials_path="path/to/creds.json")

2. LISTAR ARCHIVOS:
   files = viewer.list_files("tu-bucket", prefix="carpeta/", limit=20)

3. VISTA PREVIA R√ÅPIDA:
   viewer.preview_file("tu-bucket", "archivo.csv", n_rows=10)

4. CARGAR ARCHIVO COMPLETO:
   df = viewer.read_file("tu-bucket", "archivo.parquet")

5. AN√ÅLISIS COMPLETO:
   analysis = viewer.analyze_file("tu-bucket", "archivo.csv")

6. FUNCIONES R√ÅPIDAS:
   quick_preview("bucket", "file.csv")
   df = load_gcs_file("bucket", "file.parquet")
   files = list_bucket_files("bucket", "prefix/")

üîß PAR√ÅMETROS ADICIONALES:
   # Para CSV
   df = viewer.read_file("bucket", "file.csv", sep=";", encoding="utf-8")
   
   # Para Parquet
   df = viewer.read_file("bucket", "file.parquet", columns=["col1", "col2"])
"""

‚úÖ Cliente GCS inicializado correctamente
üöÄ EJEMPLO 1: Listar archivos
üìÅ Archivos en gs://energia-tfm-bucket/curated/prices/:
--------------------------------------------------------------------------------
üìÑ curated/prices/year=2025/month=08/compact.parquet (0.03 MB)
üìÑ curated/prices/year=2025/month=09/compact.parquet (0.03 MB)
üìÑ curated/prices/year=2025/month=10/day=04/part-80670b9c-4773-4859-b8ad-4be20ed67d72.parquet (0.00 MB)

üîç Total: 3 archivos encontrados

üöÄ EJEMPLO 2: Vista previa del primer archivo
üëÄ Vista previa de: gs://energia-tfm-bucket/curated/prices/year=2025/month=08/compact.parquet
‚úÖ Parquet le√≠do: 5232 filas, 7 columnas
üìä Dimensiones: 5232 filas √ó 7 columnas
üíæ Memoria: 0.84 MB

üîù Primeras 5 filas:
--------------------------------------------------------------------------------


Unnamed: 0,hour_ts,price_eur_mwh,zone,source,indicator_id,year,month
0,2025-08-01 00:00:00+02:00,144.8,Espa√±a,PVPC,1001,2025,8
1,2025-08-01 00:00:00+02:00,105.31,Alemania,SPOT_ES,600,2025,8
2,2025-08-01 00:00:00+02:00,104.85,B√©lgica,SPOT_ES,600,2025,8
3,2025-08-01 00:00:00+02:00,102.22,Espa√±a,SPOT_ES,600,2025,8
4,2025-08-01 00:00:00+02:00,102.22,Francia,SPOT_ES,600,2025,8



üìã Informaci√≥n de columnas:
--------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5232 entries, 0 to 5231
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype                        
---  ------         --------------  -----                        
 0   hour_ts        5232 non-null   datetime64[ns, Europe/Madrid]
 1   price_eur_mwh  5232 non-null   float64                      
 2   zone           5232 non-null   object                       
 3   source         5232 non-null   object                       
 4   indicator_id   5232 non-null   int64                        
 5   year           5232 non-null   category                     
 6   month          5232 non-null   category                     
dtypes: category(2), datetime64[ns, Europe/Madrid](1), float64(1), int64(1), object(2)
memory usage: 214.9+ KB


None


üìà Estad√≠sticas b√°sicas (2 columnas num√©ricas):
--------------------------------------------------------------------------------


Unnamed: 0,price_eur_mwh,indicator_id
count,5232.0,5232.0
mean,77.673976,657.022936
std,49.905784,140.065455
min,-69.63,600.0
25%,43.23,600.0
50%,84.79,600.0
75%,105.2325,600.0
max,315.71,1001.0



üöÄ EJEMPLO 3: Cargar archivo completo
‚úÖ Parquet le√≠do: 5232 filas, 7 columnas
DataFrame cargado: (5232, 7)


'\nüìñ GU√çA DE USO:\n\n1. CONFIGURACI√ìN INICIAL:\n   viewer = GCSFileViewer()\n   # O con credenciales espec√≠ficas:\n   viewer = GCSFileViewer(project_id="tu-proyecto", credentials_path="path/to/creds.json")\n\n2. LISTAR ARCHIVOS:\n   files = viewer.list_files("tu-bucket", prefix="carpeta/", limit=20)\n\n3. VISTA PREVIA R√ÅPIDA:\n   viewer.preview_file("tu-bucket", "archivo.csv", n_rows=10)\n\n4. CARGAR ARCHIVO COMPLETO:\n   df = viewer.read_file("tu-bucket", "archivo.parquet")\n\n5. AN√ÅLISIS COMPLETO:\n   analysis = viewer.analyze_file("tu-bucket", "archivo.csv")\n\n6. FUNCIONES R√ÅPIDAS:\n   quick_preview("bucket", "file.csv")\n   df = load_gcs_file("bucket", "file.parquet")\n   files = list_bucket_files("bucket", "prefix/")\n\nüîß PAR√ÅMETROS ADICIONALES:\n   # Para CSV\n   df = viewer.read_file("bucket", "file.csv", sep=";", encoding="utf-8")\n\n   # Para Parquet\n   df = viewer.read_file("bucket", "file.parquet", columns=["col1", "col2"])\n'

In [7]:
# 1. Usar uno de los archivos listados anteriormente
files = viewer.list_files("energia-tfm-bucket", "curated/prices/", limit=10)
print("Archivos disponibles:", files)

# 2. Vista previa del primer archivo
if files:
    viewer.preview_file("energia-tfm-bucket", files[0])

# 3. O directamente (SIN gs://bucket-name/)
viewer.preview_file(
    "energia-tfm-bucket",
    "curated/prices/indicator_id=1001/zone=Baleares/year=2025/month=09/day=21/part-0360d6ce-c082-4fe5-8f52-08f0ecac9e54.parquet",
)

üìÅ Archivos en gs://energia-tfm-bucket/curated/prices/:
--------------------------------------------------------------------------------
üìÑ curated/prices/year=2025/month=09/compact.parquet (0.00 MB)

üîç Total: 1 archivos encontrados
Archivos disponibles: ['curated/prices/year=2025/month=09/compact.parquet']
üëÄ Vista previa de: gs://energia-tfm-bucket/curated/prices/year=2025/month=09/compact.parquet
‚úÖ Parquet le√≠do: 24 filas, 7 columnas
üìä Dimensiones: 24 filas √ó 7 columnas
üíæ Memoria: 0.00 MB

üîù Primeras 10 filas:
--------------------------------------------------------------------------------


Unnamed: 0,hour_ts,price_eur_mwh,zone,source,indicator_id,year,month
0,2025-09-09 00:00:00+02:00,106.76,Espa√±a,PVPC,1001,2025,9
1,2025-09-09 01:00:00+02:00,107.28,Espa√±a,PVPC,1001,2025,9
2,2025-09-09 02:00:00+02:00,106.52,Espa√±a,PVPC,1001,2025,9
3,2025-09-09 03:00:00+02:00,103.72,Espa√±a,PVPC,1001,2025,9
4,2025-09-09 04:00:00+02:00,104.19,Espa√±a,PVPC,1001,2025,9
5,2025-09-09 05:00:00+02:00,111.25,Espa√±a,PVPC,1001,2025,9
6,2025-09-09 06:00:00+02:00,127.23,Espa√±a,PVPC,1001,2025,9
7,2025-09-09 07:00:00+02:00,140.44,Espa√±a,PVPC,1001,2025,9
8,2025-09-09 08:00:00+02:00,163.95,Espa√±a,PVPC,1001,2025,9
9,2025-09-09 09:00:00+02:00,146.31,Espa√±a,PVPC,1001,2025,9



üìã Informaci√≥n de columnas:
--------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype                        
---  ------         --------------  -----                        
 0   hour_ts        24 non-null     datetime64[ns, Europe/Madrid]
 1   price_eur_mwh  24 non-null     float64                      
 2   zone           24 non-null     object                       
 3   source         24 non-null     object                       
 4   indicator_id   24 non-null     int64                        
 5   year           24 non-null     category                     
 6   month          24 non-null     category                     
dtypes: category(2), datetime64[ns, Europe/Madrid](1), float64(1), int64(1), object(2)
memory usage: 1.3+ KB


None


üìà Estad√≠sticas b√°sicas (2 columnas num√©ricas):
--------------------------------------------------------------------------------


Unnamed: 0,price_eur_mwh,indicator_id
count,24.0,24.0
mean,143.295833,1001.0
std,50.903536,0.0
min,70.5,1001.0
25%,105.9375,1001.0
50%,142.395,1001.0
75%,168.28,1001.0
max,253.58,1001.0


üëÄ Vista previa de: gs://energia-tfm-bucket/curated/prices/indicator_id=1001/zone=Baleares/year=2025/month=09/day=21/part-0360d6ce-c082-4fe5-8f52-08f0ecac9e54.parquet
‚ö†Ô∏è  Error con pandas directo: energia-tfm-bucket/curated/prices/indicator_id=1001/zone=Baleares/year=2025/month=09/day=21/part-0360d6ce-c082-4fe5-8f52-08f0ecac9e54.parquet
üîÑ Intentando con PyArrow...
‚ö†Ô∏è  Error con PyArrow: 404 GET https://storage.googleapis.com/download/storage/v1/b/energia-tfm-bucket/o/curated%2Fprices%2Findicator_id%3D1001%2Fzone%3DBaleares%2Fyear%3D2025%2Fmonth%3D09%2Fday%3D21%2Fpart-0360d6ce-c082-4fe5-8f52-08f0ecac9e54.parquet?alt=media: No such object: energia-tfm-bucket/curated/prices/indicator_id=1001/zone=Baleares/year=2025/month=09/day=21/part-0360d6ce-c082-4fe5-8f52-08f0ecac9e54.parquet: ('Request failed with status code', 404, 'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.PARTIAL_CONTENT: 206>)
üîÑ Intentando con esquema flexible...
‚ùå Error final: 404 GET https://storage.

In [8]:
nombre_fichero = "raw/prices_spot/year=2025/month=09/day=21/prices_spot_2025-09-21T10:13:14.814681Z.csv"

viewer.preview_file("energia-tfm-bucket", nombre_fichero)

nombre_fichero2 = "curated/prices/indicator_id=600/zone=Alemania/year=2025/month=09/day=21/part-941bb899-82bb-4e03-9642-721be1a05c5c.parquet"

viewer.preview_file("energia-tfm-bucket", nombre_fichero2)

üëÄ Vista previa de: gs://energia-tfm-bucket/raw/prices_spot/year=2025/month=09/day=21/prices_spot_2025-09-21T10:13:14.814681Z.csv
‚ùå Error leyendo archivo: 404 GET https://storage.googleapis.com/download/storage/v1/b/energia-tfm-bucket/o/raw%2Fprices_spot%2Fyear%3D2025%2Fmonth%3D09%2Fday%3D21%2Fprices_spot_2025-09-21T10%3A13%3A14.814681Z.csv?alt=media: No such object: energia-tfm-bucket/raw/prices_spot/year=2025/month=09/day=21/prices_spot_2025-09-21T10:13:14.814681Z.csv: ('Request failed with status code', 404, 'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.PARTIAL_CONTENT: 206>)
üëÄ Vista previa de: gs://energia-tfm-bucket/curated/prices/indicator_id=600/zone=Alemania/year=2025/month=09/day=21/part-941bb899-82bb-4e03-9642-721be1a05c5c.parquet
‚ö†Ô∏è  Error con pandas directo: energia-tfm-bucket/curated/prices/indicator_id=600/zone=Alemania/year=2025/month=09/day=21/part-941bb899-82bb-4e03-9642-721be1a05c5c.parquet
üîÑ Intentando con PyArrow...
‚ö†Ô∏è  Error con PyArrow: 40

In [9]:
nombre_fichero = "raw/prices_pvpc/year=2025/month=09/day=21/prices_pvpc_2025-09-21T10:19:31.780426Z.csv"

viewer.preview_file("energia-tfm-bucket", nombre_fichero)

nombre_fichero2 = "curated/prices/indicator_id=1001/zone=Pen√≠nsula/year=2025/month=09/day=22/part-1ad813ae-40f9-43a0-821b-ad29d26bbe6c.parquet"
viewer.preview_file("energia-tfm-bucket", nombre_fichero2)

üëÄ Vista previa de: gs://energia-tfm-bucket/raw/prices_pvpc/year=2025/month=09/day=21/prices_pvpc_2025-09-21T10:19:31.780426Z.csv
‚ùå Error leyendo archivo: 404 GET https://storage.googleapis.com/download/storage/v1/b/energia-tfm-bucket/o/raw%2Fprices_pvpc%2Fyear%3D2025%2Fmonth%3D09%2Fday%3D21%2Fprices_pvpc_2025-09-21T10%3A19%3A31.780426Z.csv?alt=media: No such object: energia-tfm-bucket/raw/prices_pvpc/year=2025/month=09/day=21/prices_pvpc_2025-09-21T10:19:31.780426Z.csv: ('Request failed with status code', 404, 'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.PARTIAL_CONTENT: 206>)
üëÄ Vista previa de: gs://energia-tfm-bucket/curated/prices/indicator_id=1001/zone=Pen√≠nsula/year=2025/month=09/day=22/part-1ad813ae-40f9-43a0-821b-ad29d26bbe6c.parquet
‚ö†Ô∏è  Error con pandas directo: energia-tfm-bucket/curated/prices/indicator_id=1001/zone=Pen√≠nsula/year=2025/month=09/day=22/part-1ad813ae-40f9-43a0-821b-ad29d26bbe6c.parquet
üîÑ Intentando con PyArrow...
‚ö†Ô∏è  Error con PyArr

In [3]:
# 4. CARGAR ARCHIVO COMPLETO:
archivo = "curated/demand/year=2025/month=10/day=04/part-6d9e284d-bd74-49bc-b230-1c96d381628b.parquet"

df = viewer.read_file("energia-tfm-bucket", archivo)

df

‚úÖ Parquet le√≠do: 13 filas, 8 columnas


Unnamed: 0,minute_ts,zone,demanda_real_mw,demanda_prevista_h_mw,demanda_programada_h_mw,year,month,day
0,2025-10-04 19:00:00+02:00,Espa√±a,27212.0,27075.0,27574.0,2025,10,4
1,2025-10-04 19:05:00+02:00,Espa√±a,27655.0,27100.0,27574.0,2025,10,4
2,2025-10-04 19:10:00+02:00,Espa√±a,27402.0,27177.0,27574.0,2025,10,4
3,2025-10-04 19:15:00+02:00,Espa√±a,27498.0,27306.0,28091.0,2025,10,4
4,2025-10-04 19:20:00+02:00,Espa√±a,27549.0,27466.0,28091.0,2025,10,4
5,2025-10-04 19:25:00+02:00,Espa√±a,27562.0,27635.0,28091.0,2025,10,4
6,2025-10-04 19:30:00+02:00,Espa√±a,27577.0,27814.0,28307.0,2025,10,4
7,2025-10-04 19:35:00+02:00,Espa√±a,27926.0,27980.0,28307.0,2025,10,4
8,2025-10-04 19:40:00+02:00,Espa√±a,27943.0,28111.0,28307.0,2025,10,4
9,2025-10-04 19:45:00+02:00,Espa√±a,28164.0,28207.0,28706.0,2025,10,4
