In [9]:
import os
import pandas as pd

## Locate all files in the directory

In [10]:
directory = r"../data/original/"
file_names = []

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_names.append(filename)

In [11]:
print(f'No of files: {len(file_names)}')

No of files: 118


In [12]:
# Check file house name
file_names[0].split('home')[1].split('.')[0]

'299'

In [18]:
directory+file_names[0].split('home')[1].split('.')[0]

'../data/original/299'

## Define master function to transform the data

In [13]:
def transform_file(file_name):
    df = pd.read_csv(file_name, header=None)

    #rename the columns
    df.columns = ['datetime', 'power (watts)']

    #convert datetime column to datetime object
    df['datetime'] = pd.to_datetime(df['datetime'])

    #add column with time difference
    df_consp = df.copy().reset_index(drop=True, inplace=False)
    df_consp['time (s)'] = df_consp['datetime'].diff().dt.seconds

    # calculate energy consumption in watt-hours
    df_consp['Consumption (kWh)'] = df_consp['power (watts)'] * df_consp['time (s)'] / (3600*1000)

    # calculate daily consumption
    df_daily = df_consp.resample('D', on='datetime').sum()

    # Only save the rows where time is 24 hours
    df_daily = df_daily[df_daily['time (s)']==86400]

    # Drop unwanted columns
    df_daily.drop(['power (watts)'], axis=1, inplace=True)
    df_daily.drop(['time (s)'], axis=1, inplace=True)

    df_daily['homeid'] = filename.split('home')[1].split('.')[0]
    df_daily.reset_index(inplace=True)

    #print(f'{file_name} transformed successfully!')

    return df_daily

## Save new file and delete original file

In [14]:
# export dataframe to csv file and delete original file
def export_file(df, file_name):
    df.to_csv('../data/data_transformed/' + file_name, index=False)
    print(f'{file_name} exported successfully!')

    original_path = directory + file_name

    if os.path.exists(original_path):
        os.remove(original_path)
        print(f'Original {file_name} deleted successfully!')
        print('----------------------------------------------\n')


In [15]:
# loop through all files in the directory and transform them
status = 1
for file_name in file_names:
    print(f'Processing file {status} of {len(file_names)}')
    df = transform_file(directory + file_name)
    export_file(df, file_name)
    status += 1

Processing file 1 of 118
home299.csv exported successfully!
Original home299.csv deleted successfully!
----------------------------------------------

Processing file 2 of 118
home266.csv exported successfully!
Original home266.csv deleted successfully!
----------------------------------------------

Processing file 3 of 118
home272.csv exported successfully!
Original home272.csv deleted successfully!
----------------------------------------------

Processing file 4 of 118
home306.csv exported successfully!
Original home306.csv deleted successfully!
----------------------------------------------

Processing file 5 of 118
home307.csv exported successfully!
Original home307.csv deleted successfully!
----------------------------------------------

Processing file 6 of 118
home313.csv exported successfully!
Original home313.csv deleted successfully!
----------------------------------------------

Processing file 7 of 118
home273.csv exported successfully!
Original home273.csv deleted succe