In [None]:
%run requirements.ipynb
%run config.ipynb

In [None]:
class FdiGenerator:
    def __init__(self):
        self.alpha_max = 0.8
        self.alpha_min = 0.2
    
    # x is the origin value

    def fdi_1(self, x):
        fdi =  x * self.get_alpha()
        return fdi

    def fdi_2(self, x, df_max):
        alpha = self.get_alpha()
        gamma = alpha * df_max
        return gamma if (x > gamma) else x

    def fdi_3(self, x, df_max):
        gamma = np.random.uniform(0, 1) * df_max
        return max((x - gamma), 0)
    
    def fdi_4(self, x, index, total_index):
        t2 = np.random.randint(0, 20)
        t1 = np.random.randint(t2, 24)
        if index > t2 and index < t2:
            return 0
        return x
    
    def fdi_5(self, x, index):
        alpha = self.get_alpha()
        return x * alpha
    
    def fdi_6(self, avg_x):
        alpha = self.get_alpha()
        return avg_x * alpha
    
    def get_alpha(self):
        return np.random.uniform(self.alpha_min, self.alpha_max)
    
    def get_random_mode(self):
        # return an integer in range from 1 to 6
        fdi_method = np.random.randint(1, 7)
        return fdi_method
    
    def random_fdi(self, fdi_method, x, df, index):
        # return a fdi value with fdi_method provided
        if fdi_method == 1:
            return self.fdi_1(x)
        elif fdi_method == 2:
            return self.fdi_2(x, df.max())
        elif fdi_method == 3:
            return self.fdi_3(x, df.max())
        elif fdi_method == 4:
            return self.fdi_4(x, index, df.shape[0])
        elif fdi_method == 5:
            return self.fdi_5(x, index)
        else:
            return self.fdi_6(df.mean())

In [None]:
# split data into groups
def split_data_to_group(target_df, areas, METER_IN_RANGE, NUMBER_OF_HALF_HOURS, TOTAL_METER):
    data_group = {}
    for group in range(areas):
        if group == (AREA_NUM - 1):
            data_group[group] = target_df[METER_IN_RANGE*group*(NUMBER_OF_HALF_HOURS) : METER_IN_RANGE*(group+1)*(NUMBER_OF_HALF_HOURS)+NUMBER_OF_HALF_HOURS* (TOTAL_METER % areas)]
        else:
            data_group[group] = target_df[METER_IN_RANGE*group*(NUMBER_OF_HALF_HOURS) : METER_IN_RANGE*(group+1)*(NUMBER_OF_HALF_HOURS)]
    return data_group

In [None]:
def split_group(group, total_meter, area_num, group_meter, total_day, ntl_meter_ratio, ntl_day_ratio):
    # ex: split_group(data_group, total_meter = 2044, group_meter = 204, total_day = 299, ntl_meter_ratio = 0.1, ntl_day_ratio = 0.5)
    
    number_of_half_hours = total_day * 48
    
    ntl_num = round(group_meter * ntl_meter_ratio) # ntl_num = how many number of ntl meters
    ntl_day_num = round(total_day * ntl_day_ratio) # ntl_day_num = how many days are fdi for ntl meters
    
    np.random.seed(42)
    
    final_data_group = []
    origin_data_group = []
    data_group = copy.deepcopy(group) 
    
    # iterate through 10 area
    for group, data in data_group.items():
        
        final_data = {}
        origin_data = {}

        normal_data = []
        fdi_data = []
        origin_fdi_data = []

        if group == (area_num - 1): # the last group will have more data for the rest of the meters
            num = total_meter - group_meter * (area_num -1)
        else:
            num = group_meter

        #split total meter into normal meters and fdi meters
        rs = ShuffleSplit(n_splits=1, test_size=ntl_num)
        for normal_index, fdi_index in rs.split(range(num)):
            for index in normal_index:
                normal_data.append(data[index*(number_of_half_hours):(index+1)*(number_of_half_hours)])
            for index in fdi_index:
                fdi_data.append(data[index*(number_of_half_hours):(index+1)*(number_of_half_hours)])
                
        origin_fdi_data = copy.deepcopy(fdi_data)
            
        for index in tq.tqdm(range(len(fdi_data))):

            dataframe = fdi_data[index]
            fdi_dataframe = np.zeros((number_of_half_hours))
            
            # split total day into normal and fdi day with the ntl_day_num
            rs = ShuffleSplit(n_splits=1, test_size=ntl_day_num, random_state=42)
            for normal_day, fdi_day in rs.split(range(total_day)):
                normal_day = normal_day
                fdi_day = fdi_day
            
            # set normal data in normal days
            for day in range(len(normal_day)):
                for i in range(normal_day[day]*48, (normal_day[day]+1)*48):
                    fdi_dataframe[i] = dataframe["Electricity"].iloc[[i]]
            
            # process FDI in fdi days
            for day in range(len(fdi_day)):
                  for i in range(fdi_day[day]*48, (fdi_day[day]+1)*48):
                        
                        # one day has 48 data, so every 48 will change a fdi mode
                        if i%48 == 0:
                            fdi_method = FDI.get_random_mode()
                        
                        origin_value = dataframe["Electricity"].iloc[i]
                        fdi_dataframe[i] = FDI.random_fdi(fdi_method, origin_value, dataframe["Electricity"], i)

            fdi_data[index]["Electricity"] = fdi_dataframe

        final_data["normal"] = normal_data
        final_data["fdi"] = fdi_data
        final_data_group.append(final_data) 

        origin_data["normal"] = normal_data
        origin_data["fdi"] = origin_fdi_data
        origin_data_group.append(origin_data)

    return final_data_group, origin_data_group

In [None]:
def plot_data(normal_data, fdi_data, title, label1, label2, plot_house=5, plot_hour=120, save=False):
    #plotting the points  
    plt.figure(dpi=150, figsize=(15,4))
    if normal_data is not None:
        count = 0
    for i in range(len(normal_data)):
        data = normal_data[i]["Electricity"]
        count += 1
        if count>plot_house:
            break
        plt.plot(HOURS_LIST[:plot_hour], data[:plot_hour], color='blue') 

    if fdi_data is not None:
        count = 0
    for i in range(len(fdi_data)):
        data = fdi_data[i]["Electricity"]
        count += 1
        if count>plot_house:
            break
        plt.plot(HOURS_LIST[:plot_hour], data[:plot_hour], color='red')
    
    
    blue_patch = mpatches.Patch(color='blue', label=label1)
    red_patch = mpatches.Patch(color='red', label=label2)
    plt.legend(handles=[blue_patch, red_patch])       
        
    # naming the x axis 
    plt.xlabel('Half Hours') 
    # naming the y axis 
    plt.ylabel('Electricity Consumption(W)') 

    # giving a title to my graph 
    plt.title(title) 

    # function to save the plot
    if save == True:
        save_fig(title)

    # function to show the plot 
    plt.show()