In [1]:
#basic voltage correlation for phase identification
#author: Kuthsav Thattai
#Company: Village Energy
#voltage-profile data used in this example is generated from gridLAB-d simulation of IEEE European LV Test Feeder

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline

In [2]:
volt_raw_df = pd.read_csv('load_voltage_profile_55.csv')
num_loads=len(volt_raw_df.columns)-1
print('Total Loads as per the data is:',num_loads)

Total Loads as per the data is: 55


In [None]:
# One might not be able to designate a particular phase as A, B, or C as they are equally displaced by 120.
#So using x,y,z to represent 3-phases
x_set=set()
y_set=set()
z_set=set()

In [3]:
volt_raw_df.head()

Unnamed: 0,timestamp,voltage_load1,voltage_load2,voltage_load3,voltage_load4,voltage_load5,voltage_load6,voltage_load7,voltage_load8,voltage_load9,...,votlage_load46,voltage_load47,voltage_load48,voltage_load49,voltage_load50,voltage_load51,voltage_load52,voltage_load53,voltage_load54,voltage_load55
0,2000-01-01 00:01:00 EST,252.113668,252.099954,252.112302,252.0465,252.0465,252.098588,252.044152,252.067,252.022045,...,251.928325,252.023,251.928825,251.928325,251.946871,251.925959,251.916129,251.946371,251.924592,251.915263
1,2000-01-01 00:02:00 EST,252.114034,252.09449,252.113534,252.048097,252.048097,252.09449,252.03509,252.073001,252.024509,...,251.931155,252.031,251.932521,251.931155,251.934711,251.929655,251.918959,251.933845,251.928289,251.918959
2,2000-01-01 00:03:00 EST,252.108473,252.098954,252.107107,252.035608,252.035608,252.098954,252.044018,252.054,252.008921,...,251.892978,252.014,251.897076,251.89571,251.945471,251.896942,251.887112,251.945471,251.895576,251.887112
3,2000-01-01 00:04:00 EST,252.089422,252.107615,252.088922,251.985174,251.989272,252.106749,252.060242,252.025,251.968684,...,251.841713,251.976001,251.847177,251.845811,251.966658,251.851641,251.841311,251.966658,251.850275,251.840445
4,2000-01-01 00:05:00 EST,252.086362,252.11148,252.085496,251.980919,251.984151,252.110614,252.069106,251.961,251.963832,...,251.824005,251.876001,251.833067,251.831701,251.968994,251.843362,251.83303,251.968128,251.841995,251.83303


In [4]:
flag_set_phase=0 #Flag to differentiate initial phase allocation 

In [5]:
#pearson correlation will be performed for each load voltage profile agains remaing loads voltage-profile. 
#Based on the person coefficient the loads are seggregated into corresponding sets

for i in range (1,num_loads,1):
    phases_set = x_set.union(y_set,z_set)
    for j in range(i+1,num_loads+1,1):
        pearson_coef, p_value = stats.pearsonr(volt_raw_df.iloc[:,i], volt_raw_df.iloc[:,j])
        if pearson_coef > 0.85:
            if volt_raw_df.columns.values[i] not in phases_set and volt_raw_df.columns.values[j] not in phases_set:
                if flag_set_phase == 0:
                    x_set.add(volt_raw_df.columns.values[i])
                    x_set.add(volt_raw_df.columns.values[j])
                    flag_set_phase = flag_set_phase+1
                    break
                elif flag_set_phase == 1:
                    y_set.add(volt_raw_df.columns.values[i])
                    y_set.add(volt_raw_df.columns.values[j])
                    flag_set_phase = flag_set_phase+1
                    break
                else :
                    z_set.add(volt_raw_df.columns.values[i])
                    z_set.add(volt_raw_df.columns.values[j])
            elif volt_raw_df.columns.values[i] in x_set:
                x_set.add(volt_raw_df.columns.values[j])
            elif volt_raw_df.columns.values[i] in y_set:
                y_set.add(volt_raw_df.columns.values[j])
            elif volt_raw_df.columns.values[i] in z_set:
                z_set.add(volt_raw_df.columns.values[j])
            else :
                print("condition not satisfied by ",volt_raw_df.columns.values[i]," or ",volt_raw_df.columns.values[j])
                print("Check the code again")

In [6]:
#Converting Set into list
x_list = list(x_set)
y_list = list(y_set)
z_list = list(z_set)

In [7]:
#Converting lists into dataframes
phaseX_allocated_df = pd.DataFrame(x_list,columns =['Phase X'])
phaseY_allocated_df = pd.DataFrame(y_list,columns =['Phase Y'])
phaseZ_allocated_df = pd.DataFrame(z_list,columns =['Phase Z'])

In [8]:
#Concating all the dataframes
phase_allocated_df = pd.concat([phaseX_allocated_df, phaseY_allocated_df, phaseZ_allocated_df], axis=1, ignore_index=True, sort=False)
phase_allocated_df.columns = ['phase X', 'phase Y', 'phase X']
phase_allocated_df.index.name = 'Index'

In [9]:
#Saving the dataframe as a csv file
phase_allocated_df.to_csv('Loads_allocated_to_phase.csv')