In [1]:
import pandas as pd
import os
from datetime import datetime, time

In [2]:
import os

def list_files_in_folder(folder_path):
    file_paths = []
    
    # Check if the folder exists
    if os.path.exists(folder_path):
        # Iterate through all files in the folder
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            
            # Check if the path is a file (not a directory)
            if os.path.isfile(file_path):
                file_paths.append(file_path)
    
    return file_paths

folder_path = 'manual data'  # Replace with the actual folder path
files_path = list_files_in_folder(folder_path)

# Print the list of file paths
for file_path in files_path:
    print(file_path)
    


manual data\WR Leyland RDC (DB 1-12 Sitewide Trailer Hookup Points_DB 1-12 Sitewide Trailer Hookup Points) 2023-11-01_2023-11-06.csv
manual data\WR Leyland RDC (Trailer Hook Up Points BusBar East_Trailer Hook Up Points BusBar East) 2023-11-01_2023-11-06.csv
manual data\WR Leyland RDC (Trailer Hook Up Points BusBar West_Trailer Hook Up Points BusBar West) 2023-11-01_2023-11-06.csv


In [3]:
import pandas as pd

def update_and_save_dataframe(file_path, dfs_consumption, dfs_detail):
    # Read CSV into a DataFrame
    df = pd.read_csv(file_path)
    
    # Convert DateTime to datetime format with day first
    df['DateTime'] = pd.to_datetime(df['DateTime'], dayfirst=True)
    
    # Set DateTime as the index
    df.set_index('DateTime', inplace=True)
    
    # Ensure that there is an entry for 23:59 on the last day
    last_date = df.index.date[-1]
    end_of_day = pd.Timestamp.combine(last_date, time(23, 59))
    if end_of_day not in df.index:
        # If there's no data at all for the last day, create a row with 0 consumption
        if not (df.index.date == last_date).any():
            df.loc[end_of_day] = [0 for _ in range(len(df.columns))]  # or appropriate default values
        else:
            df.loc[end_of_day] = df.loc[df.index.date == last_date].iloc[-1]
        df.sort_index(inplace=True) # Sort the index after adding the new row
    
    # Resample to 5 minute intervals and interpolate the kWh values
    df_resampled = df.resample('5T').ffill()
    
    # Divide the kWh values by 6 to distribute them across the 5-minute intervals
    df_resampled[' kWh'] /= 6
    
    # Rename the DateTime index back to a column
    df_resampled.reset_index(inplace=True)
    
    # Convert DateTime to the desired format with added seconds
    df_resampled['DateTime'] = df_resampled['DateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')
    
    # Add two additional columns: date and time
    df_resampled['date'] = df_resampled['DateTime'].str.split(" ").str[0]
    df_resampled['time'] = df_resampled['DateTime'].str.split(" ").str[1]
    
    # Rename columns
    df_resampled.columns = ['date_time', 'meter_desc', 'consumption', 'date', 'time']
    
    # Map meter_desc to meter_id
    meter_id_mapping = {
        "DB 1-12 Sitewide Trailer Hookup Points_DB 1-12 Sitewide Trailer Hookup Points": '00001',
        "Trailer Hook Up Points BusBar East_Trailer Hook Up Points BusBar East": '00002',
        "Trailer Hook Up Points BusBar West_Trailer Hook Up Points BusBar West": '00003'
    }
    df_resampled['meter_id'] = df_resampled['meter_desc'].map(meter_id_mapping)
    
    # Add a new column 'Site' with a value of 'Leyland' for every row
    df_resampled['Site'] = 'Leyland'
    
    # Map meter_desc to site_id
    site_id_mapping = {
        "DB 1-12 Sitewide Trailer Hookup Points_DB 1-12 Sitewide Trailer Hookup Points": '00001',
        "Trailer Hook Up Points BusBar East_Trailer Hook Up Points BusBar East": '00002',
        "Trailer Hook Up Points BusBar West_Trailer Hook Up Points BusBar West": '00003'
    }
    df_resampled['site_id'] = df_resampled['meter_desc'].map(site_id_mapping)

    # Create t_novus_meter_consumption DataFrame
    df_consumption = df_resampled[['consumption', 'date_time', 'meter_id']].copy()
    dfs_consumption.append(df_consumption)

    # Create t_novus_meter_consumption_detail DataFrame
    df_detail = df_resampled[['consumption', 'date', 'date_time', 'meter_id', 'time']].copy()
    dfs_detail.append(df_detail)




In [4]:
# Initialize lists to store DataFrames for each type
dfs_consumption = []
dfs_detail = []

for each_file in files_path:
    update_and_save_dataframe(each_file, dfs_consumption, dfs_detail)

# Concatenate and save DataFrames for consumption and details
df_consumption_final = pd.concat(dfs_consumption, ignore_index=True)
df_detail_final = pd.concat(dfs_detail, ignore_index=True)
df_detail_final = df_detail_final[['time', 'consumption', 'meter_id', 'date', 'date_time']]

#df_consumption_final.to_csv('manual data/t_checkit_meter_consumption.csv', index=False)
#df_detail_final.to_csv('output/t_checkit_meter_consumption_detail.csv', index=False)

# Static t_novus_meter_configuration DataFrame
data = {
    'meter_desc': [
        'Leyland DB 1-12 Sitewide Trailer Hookup Points',
        'Leyland Trailer Hook Up Points BusBar East',
        'Leyland Trailer Hook Up Points BusBar West'
    ],
    'meter_id': ['00001', '00002', '00003'],
    'meter_unit_id': [3, 3, 3],
    'org_id': ['0001', '0001', '0001'],
    'sample_period_mins': [5, 5, 5],
    'site_id': ['00001', '00002', '00003']
}

df_configuration = pd.DataFrame(data)
df_configuration = df_configuration[['org_id', 'meter_id', 'sample_period_mins', 'meter_unit_id', 'meter_desc']]
#df_configuration.to_csv('output/t_checkit_meter_configuration.csv', index=False)

#print("Files saved!")

Files saved!


In [5]:
from datetime import datetime
import os

# Define the month and year for filtering
target_month = 11  # Replace with the desired month (e.g., May)
target_year = 2023  # Replace with the desired year

# Convert the 'date_time' column to datetime format
df_detail_final['date_time'] = pd.to_datetime(df_detail_final['date_time'], format='%Y-%m-%d %H:%M:%S')

# Filter the data based on the given month and year
filtered_df = df_detail_final[(df_detail_final['date_time'].dt.month == target_month) & 
                              (df_detail_final['date_time'].dt.year == target_year)]

# Extract the month and year for the output filename
output_file_suffix = f'_{target_year}_{target_month:02d}'

# Define the output directory path
output_directory = 'output/'

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Define the output file path within the output directory
output_file_path = os.path.join(output_directory, f't_checkit_meter_consumption_detail{output_file_suffix}.csv')

# Save the filtered DataFrame to the output file
filtered_df.to_csv(output_file_path, index=False)

print(f"Filtered data saved to {output_file_path}")


Filtered data saved to output/t_checkit_meter_consumption_detail_2023_11.csv
