The goal of this CNN is to classify wind faults into four categories: 
1. 3 Phase to Ground
2. 2 Phase line to line
3. 2 phase to earth
4. 1 pole line to ground. 



In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from sklearn.model_selection import train_test_split

Preprocessing + Rolling Windows

In [None]:
WINDOW_SIZE = 20  # 1 second window (adjust if needed)
OVERLAP_SIZE = 15
LABEL_MAPPING = {
    "3P": 1,  # 3-phase to ground
    "2P": 2,   # 2-phase line to line
    "2P.EARTH": 3, # 2-phase to earth
    "1P": 4    # 1-pole line to ground
}
class WindFaultDataset(Dataset):
    """ PyTorch Dataset for Wind Fault Classification """
    def __init__(self, file_paths):
        self.data = []
        self.labels = []
        self.scaler = MinMaxScaler()

        for file_path in file_paths:
            features, label = self.process_csv(file_path)
            windows, labels = self.create_rolling_windows(features, label, WINDOW_SIZE, OVERLAP_SIZE)
            self.data.append(windows)
            self.labels.append(labels)

        self.data = np.vstack(self.data)  # Stack all windows
        self.labels = np.hstack(self.labels)

    def process_csv(self, file_path):
        """ Load and preprocess a CSV file. """
        df = pd.read_csv(file_path)
        df.columns = pd.read_csv(file_path, nrows=1).iloc[0]  # Assign proper column names
        df = df.drop(df.columns[0], axis=1)  # Dropping time column
        df = df.apply(pd.to_numeric, errors='coerce')  # Convert to numeric
        
        # Extract label from filename
        fault_type = os.path.basename(file_path).split("-")[0]
        label = LABEL_MAPPING.get(fault_type, 0)
        
        # Normalize features
        feature_data = self.scaler.fit_transform(df)
        # Add label column to feature_data
        label_column = np.full((feature_data.shape[0], 1), label)
        feature_data = np.hstack((feature_data, label_column))
        
        return feature_data, label

    def create_rolling_windows(self, data, label, window_size, overlap_size):
        """ Generate overlapping rolling windows of data. """
        windows = []
        labels = []
        step_size = window_size - overlap_size
        for i in range(0, len(data) - window_size + 1, step_size):  # Overlapping windows
            windows.append(data[i:i + window_size])
            labels.append(label)
        return np.array(windows), np.array(labels)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)

I am going to make a seperate folder within Grid_faults to store the noise generated data. 

In [None]:

def add_noise_to_csv_files(input_dir, output_subdir, noise_level=0.02):
    """
    Adds noise to data from CSV files in a directory and saves them as new CSV files in a subfolder.

    Parameters:
    - input_dir (str): Path to the directory containing the original CSV files.
    - output_subdir (str): Name of the subfolder to save the augmented CSV files.
    - noise_level (float): Standard deviation of the Gaussian noise to be added.
    """
    # Create the output subdirectory if it doesn't exist
    output_dir = os.path.join(input_dir, output_subdir)
    os.makedirs(output_dir, exist_ok=True)

    # Iterate over all CSV files in the input directory
    for file_name in os.listdir(input_dir):
        if file_name.endswith('.csv'):
            file_path = os.path.join(input_dir, file_name)
            df = pd.read_csv(file_path)
            
            # Add Gaussian noise to the data
            noise = np.random.normal(0, noise_level, df.shape)
            df_noisy = df + noise
            
            # Save the augmented data to a new CSV file in the output subdirectory
            output_file_path = os.path.join(output_dir, f"noisy-{file_name}")
            df_noisy.to_csv(output_file_path, index=False)
            print(f"Saved noisy data to {output_file_path}")

# Example usage
input_directory = 'Grid_Faults'
output_subdirectory = 'Noisy_Data'
add_noise_to_csv_files(input_directory, output_subdirectory, noise_level=0.01)

Saved noisy data to Grid_Faults\Noisy_Data\noisy_1P-100-0-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_1P-20-0-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_1P-40-0-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_1P-60-0-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_1PN.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-0-100-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-0-20-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-0-40-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-0-60-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-100-0-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-100-100-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-100-20-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-100-40-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-100-60-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_2P-20-0-0.csv
Saved noisy data to Grid_Faults\Noisy_Data\noisy_

In [25]:
data =  'Grid_Faults'
file_paths = []
for file in os.listdir(data):
    file_paths.append(os.path.join(data, file))
# file_paths = ["Wind_Faults\\2P.EARTH-0-40-0.csv"]  # Replace with actual file paths
dataset = WindFaultDataset(file_paths)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Print dataset shape
sample_data, sample_label = dataset[7000]
print("Sample data shape:", sample_data.shape)  # (WINDOW_SIZE, num_features)
print("Sample label:", sample_label)
unique, counts = np.unique(dataset.labels, return_counts=True)
label_stats = dict(zip(unique, counts))
print("Label statistics:", label_stats)
#print(sample_data)

t_np = sample_data.numpy() #convert to Numpy array
df = pd.DataFrame(t_np) #convert to a dataframe
df.to_csv("testfile",index=False) #save to file

Sample data shape: torch.Size([20, 18])
Sample label: tensor(1)
Label statistics: {np.int64(0): np.int64(182), np.int64(1): np.int64(5782), np.int64(2): np.int64(1128), np.int64(3): np.int64(1107), np.int64(4): np.int64(183)}


In [9]:
import torch.nn as nn
import torch.optim as optim

class WindFaultCNN(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(WindFaultCNN, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(16, 32, kernel_size=5, stride=1, padding=2)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * (WINDOW_SIZE // 2), 64)
        self.fc2 = nn.Linear(64, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Change shape to (batch, features, time)
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.shape[0], -1)  # Flatten
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return self.softmax(x)

# Get input size
num_features = sample_data.shape[1]  # Number of features (columns)
num_classes = len(LABEL_MAPPING)

# Initialize model
model = WindFaultCNN(input_channels=num_features, num_classes=num_classes)
print(model)

WindFaultCNN(
  (conv1): Conv1d(18, 16, kernel_size=(5,), stride=(1,), padding=(2,))
  (relu): ReLU()
  (conv2): Conv1d(16, 32, kernel_size=(5,), stride=(1,), padding=(2,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1600, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=4, bias=True)
  (softmax): Softmax(dim=1)
)


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader)}")

print("Training complete.")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x800 and 1600x64)