In [4]:

#This code calculates the average daily consumption of several csvs taking into account the winter days
import pandas as pd
import os

# Define the directory containing the CSV files
directory = 'DATA'

# Create an empty DataFrame to store the results
results = pd.DataFrame(columns=['File_name', 'average_daily_energy_consumption_winter'])

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Construct the full file path
        file_path = os.path.join(directory, filename)
        
        # Load the data from the file
        data = pd.read_csv(file_path)
        
        # Check if 'Timestamp' column exists
        if 'Timestamp' not in data.columns:
            print(f"File {filename} does not contain 'Timestamp' column. Skipping.")
            continue
        
        # Convert Timestamp to datetime
        data['Timestamp'] = pd.to_datetime(data['Timestamp'])

        # Keep only the relevant columns
        if 'Whole_System_Energy_Consumed' not in data.columns:
            print(f"File {filename} does not contain 'Whole_System_Energy_Consumed' column. Skipping.")
            continue

        data = data[['Timestamp', 'Whole_System_Energy_Consumed']]

        # Calculate incremental electricity energy in kW 
        data['Electricity_Energy_Incremental'] = data['Whole_System_Energy_Consumed'].diff()

        # Filter data for winter months (December, January, February)
        winter_months = [12, 1, 2]
        winter_data = data[data['Timestamp'].dt.month.isin(winter_months)]

        # Resample data to daily and sum the incremental energy
        daily_energy_consumption = winter_data.resample('D', on='Timestamp')['Electricity_Energy_Incremental'].sum()

        # Calculate the average daily energy consumption
        average_daily_energy_consumption = daily_energy_consumption.mean()

        # Append the results to the DataFrame
        new_row = pd.DataFrame({
            'File_name': [filename],
            'average_daily_energy_consumption_winter': [average_daily_energy_consumption]
        })
        
        # Check if the new_row is not empty or all NA before concatenating
        if not new_row.isna().all().all():
            results = pd.concat([results, new_row], ignore_index=True)

# Save the results to an Excel file
results.to_excel('average_daily_energy_consumption_winter_2.xlsx', index=False)

print("Results saved to average_daily_energy_consumption_winter_2.xlsx")




  results = pd.concat([results, new_row], ignore_index=True)


File Property_ID=EOH0345.csv does not contain 'Timestamp' column. Skipping.
File Property_ID=EOH1880.csv does not contain 'Timestamp' column. Skipping.
Results saved to average_daily_energy_consumption_winter_2.xlsx
