In [1]:
import pandas as pd
import numpy as np
from copy import deepcopy
from os.path import join, isdir, isfile, exists
from os import listdir
import fnmatch
import re
from sys import stdout
from nilmtk.utils import get_datastore
from nilmtk.datastore import Key
from nilmtk.timeframe import TimeFrame
from nilmtk.measurement import LEVEL_NAMES
from nilmtk.utils import get_module_directory, check_directory_exists
from nilm_metadata import convert_yaml_to_hdf5, save_yaml_to_datastore

In [29]:
input_path = r'C:\Users\Ashraf\Desktop', 'Test Data.csv'
output_filename = r'C:\Users\Ashraf\Desktop', 'Test Data Converted.HDF' 
tz= 'US/Eastern'
def convert_refit(input_path, output_filename, format='HDF'):
    print("Hello")
    
    input_path = r'C:\Users\Ashraf\Desktop\Carleton Study\Thesis\My Work\Data Set'
    output_filename = 'ashraf9.hdf' 
    
    """
    Parameters
    ----------
    input_path : str
        The root path of the CSV files, e.g. House1.csv
    output_filename : str
        The destination filename (including path and suffix).
    format : str
        format of output. Either 'HDF' or 'CSV'. Defaults to 'HDF'
    """
        
    # Open DataStore
    store = get_datastore(output_filename, format, mode='w')
    print("random")
    # Convert raw data to DataStore
    _convert(input_path, store, 'Europe/London')
    

    # Add metadata
    save_yaml_to_datastore(join(get_module_directory(), 
                              'dataset_converters', 
                              'refit', 
                              'metadata'),
                         store)
    store.close()

    print("Done converting REFIT to HDF5!")
input_path = r'C:\Users\Ashraf\Desktop', 'Test Data.csv'
def _convert(input_path, store, tz, sort_index=True):
    print ("Hi")
    """
    Parameters
    ----------
    input_path : str
        The root path of the REFIT dataset.
    store : DataStore
        The NILMTK DataStore object.
    measurement_mapping_func : function
        Must take these parameters:
            - house_id
            - chan_id
        Function should return a list of tuples e.g. [('power', 'active')]
    tz : str 
        Timezone e.g. 'US/Eastern'
    sort_index : bool
    """

    check_directory_exists(input_path)

    # Iterate though all houses and channels
    # house 14 is missing!
    houses = [1,2,3,4,5,6,7,8,9,10,11,12,13,15,16,17,18,19,20,21]
    nilmtk_house_id = 0
    prefix = ''
    suffix = '_'
    version_checked = False
    
    for house_id in houses:
        nilmtk_house_id += 1
        print("Loading house", house_id, end="... ")
        stdout.flush()
        csv_filename = join(input_path, prefix + 'House' + suffix + str(house_id) + '.csv')
        if not version_checked:
            version_checked = True
            
            if exists(csv_filename):
                print('Using original filenames (House_XX.csv)')
            else:
                prefix = 'CLEAN_'
                suffix = ''
                csv_filename = join(input_path, prefix + 'House' + suffix + str(house_id) + '.csv')
                print('Using CLEAN filenames (CLEAN_HouseXX.csv)')
                
        if not exists(csv_filename):
            raise RuntimeError('Could not find REFIT files. Please check the provided folder.')
        
        # The clean version already includes header, so we
        # just skip the text version of the timestamp
        usecols = ['Unix','Aggregate','Appliance1','Appliance2','Appliance3','Appliance4','Appliance5','Appliance6','Appliance7','Appliance8','Appliance9']
        
        df = _load_csv(csv_filename, usecols, tz)
        if sort_index:
            df = df.sort_index() # might not be sorted...
        chan_id = 0
        for col in df.columns:
            chan_id += 1
            print(chan_id, end=" ")
            stdout.flush()
            key = Key(building=nilmtk_house_id, meter=chan_id)
            
            chan_df = pd.DataFrame(df[col])
            chan_df.columns = pd.MultiIndex.from_tuples([('power', 'active')])
            
            # Modify the column labels to reflect the power measurements recorded.
            chan_df.columns.set_names(LEVEL_NAMES, inplace=True)
            
            store.put(str(key), chan_df)
            
        print('')

def _load_csv(filename, usecols, tz):
    """
    Parameters
    ----------
    filename : str
    usecols : list of columns to keep
    tz : str e.g. 'US/Eastern'
    Returns
    -------
    dataframe
    """
    # Load data
    df = pd.read_csv(filename, usecols=usecols)
    print ("printing df")
    print (df)
    # Convert the integer index column to timezone-aware datetime 
    df['Unix'] = pd.to_datetime(df['Unix'], unit='s', utc=True)
    df.set_index('Unix', inplace=True)
    df = df.tz_convert(tz)

    return df
convert_refit("","",)

Hello
random
Hi
Loading house 1... Using CLEAN filenames (CLEAN_HouseXX.csv)
printing df
               Unix  Aggregate  Appliance1  Appliance2  Appliance3  \
0        1381323977        523          74           0          69   
1        1381323991        526          75           0          69   
2        1381324006        540          74           0          68   
3        1381324021        532          74           0          68   
4        1381324035        540          74           0          69   
...             ...        ...         ...         ...         ...   
6960003  1436529365        187           0          45           0   
6960004  1436529372        185           0          45           0   
6960005  1436529378        181           0          45           0   
6960006  1436529385        186           0          45           0   
6960007  1436529392        182           0          45           0   

         Appliance4  Appliance5  Appliance6  Appliance7  Appliance8  \

1 2 3 4 5 6 7 8 9 10 
Loading house 5... printing df
               Unix  Aggregate  Appliance1  Appliance2  Appliance3  \
0        1380189369        275           2           0           0   
1        1380189376        273           2           0           0   
2        1380189383        273           2           0           0   
3        1380189390        273           2           0           0   
4        1380189396        278           2           0           0   
...             ...        ...         ...         ...         ...   
7430750  1436204907        515         112           0           0   
7430751  1436204914        515         112           0           0   
7430752  1436204922        515         112           0           0   
7430753  1436204929        500         112           0           0   
7430754  1436204935        513         112           0           0   

         Appliance4  Appliance5  Appliance6  Appliance7  Appliance8  \
0                 0          11    

Loading house 9... printing df
               Unix  Aggregate  Appliance1  Appliance2  Appliance3  \
0        1387301180        199           0           0           0   
1        1387301196        198           0           0           0   
2        1387301211        197           0           0           0   
3        1387301227        198           0           0           0   
4        1387301239        199           0           0           0   
...             ...        ...         ...         ...         ...   
6169520  1436381542        606           7           0           0   
6169521  1436381549        606           7           0           0   
6169522  1436381556        606           7           0           0   
6169523  1436381563        606           7           0           0   
6169524  1436381570        606           7           0           0   

         Appliance4  Appliance5  Appliance6  Appliance7  Appliance8  \
0                 0           1           0           0  

1 2 3 4 5 6 7 8 9 10 
Loading house 15... printing df
               Unix  Aggregate  Appliance1  Appliance2  Appliance3  \
0        1387301058        243           0           0           0   
1        1387301073        244           0           0           0   
2        1387301088        245           0           0           0   
3        1387301103        249           0           0           0   
4        1387301118        241           0           0           0   
...             ...        ...         ...         ...         ...   
6225691  1436321427        164          79           0           0   
6225692  1436321434        163          79           0           0   
6225693  1436321441        166          79           8           0   
6225694  1436321448        165          79           8           0   
6225695  1436321454        165          79           8           0   

         Appliance4  Appliance5  Appliance6  Appliance7  Appliance8  \
0                 0           0   

Loading house 19... printing df
               Unix  Aggregate  Appliance1  Appliance2  Appliance3  \
0        1394122999        185          15           0           7   
1        1394123013        188          15           0           7   
2        1394123021        186          15           0           7   
3        1394123028        187          15           0           7   
4        1394123036        185           0           0           7   
...             ...        ...         ...         ...         ...   
5622605  1434771148        208          75           0          22   
5622606  1434771154        213          75           0          22   
5622607  1434771161        212          75           0          22   
5622608  1434771168        210          74           0          22   
5622609  1434771174        210          74           0          22   

         Appliance4  Appliance5  Appliance6  Appliance7  Appliance8  \
0                 0           0           0           0 