In [31]:
import os
import pandas as pd
from pathlib import Path

# Function to load the data with enhanced debugging and path consistency
def load_data(base_path, selected_months, selected_csvs, debug=False):
    # Ensure the base path is normalized
    base_path = Path(base_path)

    # Check if the master folder exists
    if debug:
        print(f"Master folder exists: {base_path}: {'Yes' if base_path.is_dir() else 'No'}")
    if not base_path.is_dir():
        raise FileNotFoundError(f"The master folder does not exist: {base_path}")

    data = {}

    for month in selected_months:
        year, month_number = month.split('_')
        year_folder = f"{year}_V3"
        month_folder = f"{year}_{month_number}"
        month_path = base_path / year_folder / month_folder  # Use Path for consistency

        if debug:
            print(f"Directory exists: {month_path}: {'Yes' if month_path.is_dir() else 'No'}")

        if month_path.is_dir():  # Ensure it is a directory
            month_data = {}
            for csv_type in selected_csvs:
                # Construct the file name, handling the special case for LightSpectra
                if csv_type == "LightSpectra":
                    file_name = f"{csv_type}- {month_folder}.csv"  # Add space after the dash
                else:
                    file_name = f"{csv_type}-{month_folder}.csv"

                file_path = month_path / file_name  # Use Path for consistency

                if debug:
                    print(f"File exists: {file_path}: {'Yes' if file_path.is_file() else 'No'}")

                try:
                    # Read the CSV file
                    data_frame = pd.read_csv(file_path, low_memory=False)

                    # Add metadata columns
                    data_frame['Source'] = csv_type
                    data_frame['Year'] = year
                    data_frame['Month'] = month_number

                    # Add to month-level data
                    month_data[csv_type] = data_frame

                except FileNotFoundError:
                    if debug:
                        print(f"File not found error when trying to load: {file_path}")
                    pass

            # Add the month data to the corresponding year
            if year not in data:
                data[year] = {}
            data[year][month] = month_data

    return data


# Base file path
base_path = "../../../OneDrive - HvA/Jaar_4/PV systems modeling and analysis/Data/data_folder"

# Specify the months and CSV types you want to load
selected_months = ["2019_01", "2019_02", "2022_03"]  # Include the problematic month
selected_csvs = ["IVCurves", "LightSpectra"]  # Include all desired CSV types

# Load the data with debugging enabled
loaded_data = load_data(base_path, selected_months, selected_csvs, debug=True)

# Display a preview of the loaded data
print("\nLoaded Data Structure Overview:")
for year, months in loaded_data.items():
    print(f"Year: {year}")
    for month, month_data in months.items():
        print(f"  Month: {month}")
        for csv_type, df in month_data.items():
            print(f"    {csv_type}: {len(df)} rows")


Master folder exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder: Yes
Directory exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder\2019_V3\2019_01: Yes
File exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder\2019_V3\2019_01\IVCurves-2019_01.csv: Yes
File exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder\2019_V3\2019_01\LightSpectra- 2019_01.csv: Yes
Directory exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder\2019_V3\2019_02: Yes
File exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder\2019_V3\2019_02\IVCurves-2019_02.csv: Yes
File exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder\2019_V3\2019_02\LightSpectra- 2019_02.csv: Yes
Directory exists: ..\..\..\OneDrive - HvA\Jaar_4\PV systems modeling and analysis\Data\data_folder

In [14]:
#Check if base_path is a directory
os.path.isdir(base_path)


True