In [5]:
import pandas as pd

def save_yearly_epw_data(file_path, start_year, end_year):
    """
    Reads an EPW file, filters out data for each year in the specified range,
    and saves separate files for each year, ensuring no unwanted empty rows.
    
    Parameters:
    - file_path: Path to the EPW file.
    - start_year: Start year of the desired data range.
    - end_year: End year of the desired data range.
    """
    col_names = [

        "Year", "Month", "Day", "Hour", "Minute", 

        "Data Source and Uncertainty Flags", "Dry Bulb Temperature", 

        "Dew Point Temperature", "Relative Humidity", "Atmospheric Station Pressure", 

        "Extraterrestrial Horizontal Radiation", "Extraterrestrial Direct Normal Radiation", 

        "Horizontal Infrared Radiation Intensity", "Global Horizontal Radiation", 

        "Direct Normal Radiation", "Diffuse Horizontal Radiation", 

        "Global Horizontal Illuminance", "Direct Normal Illuminance", 

        "Diffuse Horizontal Illuminance", "Zenith Luminance", 

        "Wind Direction", "Wind Speed", "Total Sky Cover", "Opaque Sky Cover", 

        "Visibility", "Ceiling Height", "Present Weather Observation", 

        "Present Weather Codes", "Precipitable Water", "Aerosol Optical Depth", 

        "Snow Depth", "Days Since Last Snowfall", "Albedo", 

        "Liquid Precipitation Depth", "Liquid Precipitation Quantity"

    ]

    # Load the data, skipping the header
    data = pd.read_csv(file_path, skiprows=8, header=None, names=col_names)

    # Read the header of the EPW file separately
    with open(file_path, 'r') as file:
        header = [next(file) for _ in range(8)]

    # Filter and save the data for each year
    for year in range(start_year, end_year + 1):
        yearly_data = data[data['Year'] == year]
        yearly_file_path = f"{file_path.rsplit('.', 1)[0]}_{year}.epw"
        
        # Write the header and data, controlling the newline characters
        with open(yearly_file_path, 'w', newline='') as file:
            for line in header:
                file.write(line)  # Write the header lines without additional newlines
            yearly_data.to_csv(file, header=False, index=False)

        print(f"Saved filtered data for {year} to {yearly_file_path}")

# Attempting to create and save separate files for 2020, 2021, and 2022 again
file_path = r'C:\Users\felix\Programmieren\tecdm\data\berlin\AMY_2010_2022.epw' 
saved_file_paths = save_yearly_epw_data(file_path, 2020, 2022)
saved_file_paths  # Displaying the paths of the saved files for download or any error messages


Saved filtered data for 2020 to C:\Users\felix\Programmieren\tecdm\data\berlin\AMY_2010_2022_2020.epw
Saved filtered data for 2021 to C:\Users\felix\Programmieren\tecdm\data\berlin\AMY_2010_2022_2021.epw
Saved filtered data for 2022 to C:\Users\felix\Programmieren\tecdm\data\berlin\AMY_2010_2022_2022.epw


In [7]:
format_epw = {
    "Year": {"core_name": "", "unit": "year", "time_of_meas_shift": None, "nan": None},
    "Month": {"core_name": "", "unit": "month", "time_of_meas_shift": None, "nan": None},
    "Day": {"core_name": "", "unit": "day", "time_of_meas_shift": None, "nan": None},
    "Hour": {"core_name": "", "unit": "hour", "time_of_meas_shift": None, "nan": None},
    "Minute": {"core_name": "", "unit": "minute", "time_of_meas_shift": None, "nan": None},
    "Data Source and Uncertainty Flags": {"core_name": "", "unit": None, "time_of_meas_shift": None, "nan": "?"},
    "DryBulbTemp": {"core_name": "DryBulbTemp", "unit": "degC", "time_of_meas_shift": None, "nan": 99.9},
    "DewPointTemp": {"core_name": "DewPointTemp", "unit": "degC", "time_of_meas_shift": None, "nan": 99.9},
    "RelHum": {"core_name": "RelHum", "unit": "percent", "time_of_meas_shift": None, "nan": 999.0},
    "AtmPressure": {"core_name": "AtmPressure", "unit": "Pa", "time_of_meas_shift": None, "nan": 999999.0},
    "ExtHorRad": {"core_name": "ExtHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "ExtDirNormRad": {"core_name": "ExtDirNormRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "HorInfra": {"core_name": "HorInfra", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "GlobHorRad": {"core_name": "GlobHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "DirNormRad": {"core_name": "DirNormRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "DiffHorRad": {"core_name": "DiffHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "GlobHorIll": {"core_name": "GlobHorIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0},
    "DirecNormIll": {"core_name": "DirecNormIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0},
    "DiffuseHorIll": {"core_name": "DiffuseHorIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0},
    "ZenithLum": {"core_name": "ZenithLum", "unit": "Cd/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
    "WindDir": {"core_name": "WindDir", "unit": "deg", "time_of_meas_shift": None, "nan": 999.0},
    "WindSpeed": {"core_name": "WindSpeed", "unit": "m/s", "time_of_meas_shift": None, "nan": 999.0},
    "TotalSkyCover": {"core_name": "TotalSkyCover", "unit": "1tenth", "time_of_meas_shift": None, "nan": 99},
    "OpaqueSkyCover": {"core_name": "OpaqueSkyCover", "unit": "1tenth", "time_of_meas_shift": None, "nan": 99},
    "Visibility": {"core_name": "Visibility", "unit": "km", "time_of_meas_shift": None, "nan": 9999.0},
    "CeilingH": {"core_name": "CeilingH", "unit": "m", "time_of_meas_shift": None, "nan": 99999},
    "WeatherObs": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 9},
    "WeatherCode": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 999999999},
    "PrecWater": {"core_name": "PrecWater", "unit": "mm", "time_of_meas_shift": None, "nan": 999.0},
    "Aerosol": {"core_name": "Aerosol", "unit": "1thousandth", "time_of_meas_shift": None, "nan": 0.999},
    "Snow": {"core_name": "", "unit": "cm", "time_of_meas_shift": None, "nan": 999.0},
    "DaysSinceSnow": {"core_name": "", "unit": "days", "time_of_meas_shift": None, "nan": 99},
    "Albedo": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 999},
    "LiquidPrecD": {"core_name": "LiquidPrecD", "unit": "mm/h", "time_of_meas_shift": None, "nan": 999},
    "LiquidPrepQuant": {"core_name": "", "unit": "hours", "time_of_meas_shift": None, "nan": 99},
}