In [22]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('dataset/kaggle-dataset.csv')

# Display the first few rows of the DataFrame
print(df.head())


   G  C  B  A          Ia          Ib          Ic        Va        Vb  \
0  1  0  0  1 -151.291812   -9.677452   85.800162  0.400750 -0.132935   
1  1  0  0  1 -336.186183  -76.283262   18.328897  0.312732 -0.123633   
2  1  0  0  1 -502.891583 -174.648023  -80.924663  0.265728 -0.114301   
3  1  0  0  1 -593.941905 -217.703359 -124.891924  0.235511 -0.104940   
4  1  0  0  1 -643.663617 -224.159427 -132.282815  0.209537 -0.095554   

         Vc  
0 -0.267815  
1 -0.189099  
2 -0.151428  
3 -0.130570  
4 -0.113983  


In [23]:
# Define function to determine label

from enum import Enum

class Fault(Enum):
    NA = 'No Fault'
    LL = 'Line-to-line fault'
    LLL = 'Line-to-line-to-line fault'
    LG = 'Line-to-ground fault'
    LLG = 'Line-to-line-to-ground fault'
    LLLG = 'Line-to-line-to-line-to-ground fault'

def determine_fault_label(row):
    ground = bool(row['G'])
    phase_list = []
    if(row['A']): phase_list.append('A')
    if(row['B']): phase_list.append('B')
    if(row['C']): phase_list.append('C')
    no_of_phases = len(phase_list)
    if(not ground and no_of_phases == 0):
        return Fault.NA.value
    fault_type = ''
    if(ground):
        if(len(phase_list)==1): fault_type = Fault.LG
        elif(len(phase_list)==2): fault_type = Fault.LLG
        elif(len(phase_list)==3): fault_type = Fault.LLLG
    else:
        if(len(phase_list)==2): fault_type = Fault.LL
        if(len(phase_list)==3): fault_type = Fault.LLL
    fault_phase = ''
    if(len(phase_list)==1): fault_phase =' at phase: '
    else: fault_phase =' at phases: '
    return fault_type.value + fault_phase + ', '.join(phase_list)

# Add Fault label
df['Fault'] = df.apply(determine_fault_label, axis=1)
df = df.drop(columns=['G', 'A', 'B', 'C'])
print(df.head())

df.to_csv('./dataset/labeled-dataset.csv', index=False)

           Ia          Ib          Ic        Va        Vb        Vc  \
0 -151.291812   -9.677452   85.800162  0.400750 -0.132935 -0.267815   
1 -336.186183  -76.283262   18.328897  0.312732 -0.123633 -0.189099   
2 -502.891583 -174.648023  -80.924663  0.265728 -0.114301 -0.151428   
3 -593.941905 -217.703359 -124.891924  0.235511 -0.104940 -0.130570   
4 -643.663617 -224.159427 -132.282815  0.209537 -0.095554 -0.113983   

                              Fault  
0  Line-to-ground fault at phase: A  
1  Line-to-ground fault at phase: A  
2  Line-to-ground fault at phase: A  
3  Line-to-ground fault at phase: A  
4  Line-to-ground fault at phase: A  


In [24]:
df = df.sample(frac=1).reset_index(drop=True)

print(df.head())

df.to_csv('./dataset/labeled-jumbled-dataset.csv', index=False)

           Ia          Ib          Ic        Va        Vb        Vc  \
0  173.964293  659.110478   57.747991  0.040698 -0.288973  0.248275   
1  499.863859   -9.298764   42.024224  0.277323 -0.586687  0.309364   
2  435.393473  447.805924 -883.200376  0.042335 -0.022331 -0.020004   
3   56.942625 -398.275160  343.890959  0.325074  0.037872 -0.362945   
4 -203.196547 -643.310095  846.504388 -0.041061  0.011471  0.029589   

                                               Fault  
0       Line-to-line-to-ground fault at phases: A, B  
1                   Line-to-ground fault at phase: A  
2  Line-to-line-to-line-to-ground fault at phases...  
3                 Line-to-line fault at phases: B, C  
4  Line-to-line-to-line-to-ground fault at phases...  
