### This notebook takes in csv dataset of household load data and pv generation data to perform creation of training sets 

In [5]:
# Import libraries

import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
from IFEEL import ifeel_transformation, ifeel_extraction
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import nbimporter
from Functions import Functions

In [7]:
# Import dataset of both household and PV data 
df = pd.read_csv (r"C:\Users\tangl\OneDrive\Engineering Course\Year 4\4YP Lingxi Tang\Load + PV Dataset.csv",parse_dates=['date_time'],dayfirst=True)

pv_col = 3                                                       # No. of columns of PV generation data
hh_col = 81                                                      # No. of columns of household load data


df = df.sort_values("date_time")
df.iloc[:,-pv_col:] = -df.iloc[:,-pv_col:]                       # Change PV generation data to negative
date_time = df.iloc[:,0]
df.drop(columns=df.columns[0], 
        axis=1, 
        inplace=True)


## Separate household load and PV generation data 
pv_df = df.iloc[:,-pv_col:]

## Activate either the first or second line for two datasets
#hh_df = df.iloc[:,:hh_col]                                   # First 81 household properties
hh_df = df.iloc[:,hh_col:hh_col*2]                            # Second 81 household properties

In [15]:
## Import dataset for the headers
df_test = pd.read_csv(r"C:\Users\tangl\OneDrive\Engineering Course\Year 4\4YP\IFEEL_test_data_1month_30mins.csv", header=0,index_col=0, parse_dates=False)

no_of_houses = 81

## Loop below generates annual feeder level net-load datasets with different number of PV-equipped household
for no_of_pv in range(no_of_houses+1):
    
    ## Combining household load consumption data and the PV generation dataset of specified number of PV 
    pv_pen_rate = no_of_pv/no_of_houses
    remainder_pv_no = no_of_pv%3
    whole_pv_no = no_of_pv - remainder_pv_no
    total_pv_generated = pv_df.sum(axis = 1) * whole_pv_no / no_of_houses
    if remainder_pv_no == 1 or remainder_pv_no == 2:
        more_pv = pv_df.iloc[:,0]
        more_pv = more_pv/56
        total_pv_generated = total_pv_generated + more_pv
    if remainder_pv_no == 2:
        more_pv = pv_df.iloc[:,1]
        more_pv = more_pv/56
        total_pv_generated = total_pv_generated + more_pv
    hh_df = hh_df.iloc[:,:no_of_houses]
    feeder_load = hh_df.sum(axis = 1) + total_pv_generated 
    pv_capacity = no_of_pv * 4.6       # pv capacity in kWp
    pv_pen_rate = no_of_pv/no_of_houses
    
    ## Extract averaged 24-hour net-load curves
    all_data = Functions.obtaincluster(feeder_load,pv_capacity,date_time)
    
    ## Obtain IFEEL training dataset
    feat = Functions.get_IFEEL_feats(all_data,df_test,False)
    if no_of_pv == 0: 
        all_feat = feat
    if no_of_pv != 0:
        all_feat = all_feat.append(feat)

In [9]:
## Save ML-ready training set in csv format 
#all_feat.to_csv('Datasetv4_ML_2.csv')