In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from preprocessing import add_noise_to_csv_files


The goal here is to create one large csv file that will be used to split into training windows and validation windows. There should be a label appended to the last column and this will need to be masked in evaluation. Classes of faults should be balanced and use noise to supplement the 1P, 2P, and 2P Earth up to the 3P levels

In [7]:
LABEL_MAPPING = {
    "3P": 4,  # 3-phase to ground
    "2P": 3,   # 2-phase line to line
    "2P.EARTH": 2, # 2-phase to earth
    "1P": 1,    # 1-pole line to ground
    "normal": 0  # Normal
}
files = []
# Step 1: Append label to additional last column of dataframe. 
directory = 'Distribution_faults'
for filename in os.listdir(directory):
    files.append(filename)
    if filename.endswith(".csv"):
        # Extract the fault type from the filename
        fault_type = os.path.basename(filename).split("-")[0]
        if fault_type in LABEL_MAPPING:
            label = LABEL_MAPPING[fault_type]
        else:
            print(f"Warning: Fault type '{fault_type}' not found in LABEL_MAPPING. Skipping file {filename}.")
            continue

        # Read the CSV file
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)

        # Append the fault type as a new column
        df['Fault_Type'] = label

        # Save the updated DataFrame back to the same file
        df.to_csv(file_path, index=False)
        print(f"Updated file: {filename}")
    

    label_counts = {label: 0 for label in LABEL_MAPPING.values()}

for file in files:
    fault_type = os.path.basename(file).split("-")[0]
    label = LABEL_MAPPING[fault_type]
    label_counts[label] += 1

print(label_counts)


Updated file: 1P-100-0-0.csv
Updated file: 1P-20-0-0.csv
Updated file: 1P-40-0-0.csv
Updated file: 1P-60-0-0.csv
Updated file: 2P-0-100-0.csv
Updated file: 2P-0-20-0.csv
Updated file: 2P-0-40-0.csv
Updated file: 2P-0-60-0.csv
Updated file: 2P-100-0-0.csv
Updated file: 2P-100-100-0.csv
Updated file: 2P-100-20-0.csv
Updated file: 2P-100-40-0.csv
Updated file: 2P-100-60-0.csv
Updated file: 2P-20-0-0.csv
Updated file: 2P-20-100-0.csv
Updated file: 2P-20-20-0.csv
Updated file: 2P-20-40-0.csv
Updated file: 2P-20-60-0.csv
Updated file: 2P-40-0-0.csv
Updated file: 2P-40-100-0.csv
Updated file: 2P-40-20-0.csv
Updated file: 2P-40-40-0.csv
Updated file: 2P-40-60-0.csv
Updated file: 2P-60-0-0.csv
Updated file: 2P-60-100-0.csv
Updated file: 2P-60-20-0.csv
Updated file: 2P-60-40-0.csv
Updated file: 2P-60-60-0.csv
Updated file: 2P.EARTH-0-100-0.csv
Updated file: 2P.EARTH-0-20-0.csv
Updated file: 2P.EARTH-0-40-0.csv
Updated file: 2P.EARTH-0-60-0.csv
Updated file: 2P.EARTH-100-0-0.csv
Updated file: 2P.

In [None]:
from preprocessing import add_noise_to_csv_files