In [1]:
import pandas as pd
import os

In [4]:
def load_parquet_data1(file_path: str) -> pd.DataFrame:
    return pd.read_parquet(file_path)

In [7]:
data = load_parquet_data1("../data/processed/training_data.parquet")

In [8]:
data.head()

Unnamed: 0,time,TO,sensor1_min,sensor1_max,sensor2_min,sensor2_max,sensor2_mean,sensor3_min,sensor3_max,sensor3_mean
0,1743532199,10540337,65.83,65.27,10.0,98.0,55.59,12.0,99.0,54.11
1,1743618599,10540337,76.09,54.77,10.0,98.0,57.7,10.0,99.0,51.68
2,1743704999,10540337,67.66,66.12,10.0,99.0,50.29,10.0,98.0,53.25
3,1743791399,10540337,83.22,80.61,10.0,99.0,58.97,11.0,98.0,49.92
4,1743877799,10540337,78.29,66.14,10.0,99.0,59.6,10.0,99.0,61.02


In [9]:
def load_parquet_data(file_path: str) -> pd.DataFrame:
    """
    Loads data from a Parquet file with basic validation.

    Args:
        file_path (str): Path to the Parquet file.

    Returns:
        pd.DataFrame: Loaded DataFrame.

    Raises:
        FileNotFoundError: If the file does not exist.
        ValueError: If the file is empty or contains null values.
        PermissionError: If the file is not readable.
    """

    # Check if file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"{file_path} does not exist.")

    # Check if file is not empty
    if os.stat(file_path).st_size == 0:
        raise ValueError(f"{file_path} is empty.")

    # Check if file is readable
    if not os.access(file_path, os.R_OK):
        raise PermissionError(f"{file_path} is not readable.")

    # Check file extension
    if not file_path.endswith(".parquet"):
        raise ValueError(f"{file_path} is not a Parquet file.")

    # Load data
    df = pd.read_parquet(file_path)

    # Check if DataFrame is empty
    if df.empty:
        raise ValueError(f"{file_path} contains no data.")

    # Check for null values
    if df.isnull().values.any():
        raise ValueError(f"{file_path} contains null values.")

    return df

In [11]:
load_parquet_data("../data/processed/training_data.parquet")

FileNotFoundError: ../data/processed/training_data.parquet does not exist.