In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Option for 5% or 10% attack
option = '15'
data = pd.read_csv("./Datasets/IEEE_case118_onebus_{}%.csv".format(option))

# Display the first few rows to understand the structure of the data
data.dropna(inplace=True)
#data.drop(columns=["Iteration"], inplace=True)
print(data.head())

print(np.unique(data["Attack_Bus_21"], return_counts=True))
print(np.unique(data["Attack_Bus_43"], return_counts=True))
print(np.unique(data["Attack_Bus_20"], return_counts=True))
print(np.unique(data["Attack_Bus_44"], return_counts=True))
print(np.unique(data["Attack_Bus_52"], return_counts=True))
print(np.unique(data["Attack_Bus_22"], return_counts=True))
print(np.unique(data["Attack_Bus_51"], return_counts=True))
print(np.unique(data["Attack_Bus_53"], return_counts=True))
print(np.unique(data["Attack_Bus_45"], return_counts=True))
print(np.unique(data["Attack_Bus_58"], return_counts=True))
print(np.unique(data["Attack_Bus_108"], return_counts=True))
print(np.unique(data["Attack_Bus_117"], return_counts=True))
print(np.unique(data["Attack_Bus_109"], return_counts=True))
print(np.unique(data["Attack_Bus_86"], return_counts=True))
print(np.unique(data["Attack_Bus_33"], return_counts=True))
print(np.unique(data["Attack_Bus_57"], return_counts=True))
print(np.unique(data["Attack_Bus_95"], return_counts=True))
print(np.unique(data["Attack_Bus_13"], return_counts=True))
print(np.unique(data["Attack_Bus_19"], return_counts=True))
print(np.unique(data["Attack_Bus_87"], return_counts=True))


In [None]:
# Remove columns which have only one unique value
for col in data.columns:
    if data[col].nunique() == 1:
        data.drop(columns=[col], inplace=True)
        print(f"Dropped column: {col}")
        print(data.head())


In [None]:
# Separate data
feature_cols=data.columns.difference(['Attack_Bus_21','Attack_Bus_43','Attack_Bus_20',
                                      'Attack_Bus_44','Attack_Bus_52','Attack_Bus_22',
                                      'Attack_Bus_51','Attack_Bus_53','Attack_Bus_45',
                                      'Attack_Bus_58','Attack_Bus_108','Attack_Bus_117',
                                      'Attack_Bus_109','Attack_Bus_86','Attack_Bus_33',
                                      'Attack_Bus_57','Attack_Bus_95','Attack_Bus_13',
                                      'Attack_Bus_19','Attack_Bus_87'])
features=data[feature_cols]
targets=data[['Attack_Bus_21','Attack_Bus_43','Attack_Bus_20',
                                      'Attack_Bus_44','Attack_Bus_52','Attack_Bus_22',
                                      'Attack_Bus_51','Attack_Bus_53','Attack_Bus_45',
                                      'Attack_Bus_58','Attack_Bus_108','Attack_Bus_117',
                                      'Attack_Bus_109','Attack_Bus_86','Attack_Bus_33',
                                      'Attack_Bus_57','Attack_Bus_95','Attack_Bus_13',
                                      'Attack_Bus_19','Attack_Bus_87']]


In [None]:
from sklearn.feature_selection import mutual_info_classif
import matplotlib.pyplot as plt


# Plot the feature importance
# convert to one hot to integer for feature selection
temp_target = np.zeros(targets.shape[0])
temp = targets.to_numpy()
for i in range(temp.shape[0]):
    if np.sum(temp[i, :]) == 0:
        temp_target[i] = 0
    else:
        temp_target[i] = np.argmax(temp[i,:]) + 1

print(np.unique(temp_target, return_counts=True))

plt.figure(figsize=(20, 20))
plt.barh(feature_cols, mutual_info_classif(features, temp_target, random_state=0))
plt.xlabel('Mutual Information')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.show()

In [None]:
# # Drop colums with high MI values
feature_cols = features.columns

# Scale the features
scaler=StandardScaler()
features_scaled=scaler.fit_transform(features)

# Scale the targets (No scaling required for classification targets)
scaled_target = temp_target.reshape(-1,1)

In [None]:
# Prepare the sequences
# create sequence for Time series prediction
# Convert data_scaled to a NumPy array
def to_sequences(seq_size, num_features, out_features, obs1, obs2):
    x = np.zeros((len(obs1) - seq_size, seq_size, num_features))
    y = np.zeros((len(obs2) - seq_size, out_features))

    for i in range(len(obs1) - seq_size):
        window = obs1[i:(i + seq_size)]
        after_window = obs2[i + seq_size-1, :]
        ##after_window = obs2[i + seq_size - out_seq + 1:i + seq_size + 1]
        # Create sequence
        x[i] = window.reshape(1,-1, num_features)
        y[i] = after_window.reshape(-1, out_features)
        
    return x,y

# Create test and train sequence data
features = features_scaled
target = scaled_target
# feature space
num_features = features_scaled.shape[1]
out_features = scaled_target.shape[1]

# Increase magnitude for better prediction
amp_increase = 1

# Select sequence length
sequence_size = 12

# Create sequence and increase magnitude
# Obs 1 : Features      Obs2 : Target
X, y = to_sequences(sequence_size, num_features,
                    out_features, features_scaled, scaled_target)

# Increase magnitude for inputs
X = amp_increase*X
y = amp_increase*y


# Sanity check
print(X.shape)
print(y.shape)

# Change Shape for PyTorch
y = y.reshape(-1)

In [None]:
from sklearn.model_selection import train_test_split
# 1. train_test_split with stratify
X_train1, X_test, y_train1, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y, shuffle=True)
print("Train set (train_test_split):", X_train1.shape)
print("Test set (train_test_split):", X_test.shape)

# Validation split
X_train, X_val, y_train, y_val = train_test_split(X_train1, y_train1, test_size=0.2, random_state=42, stratify=y_train1, shuffle=True)
print("Train set (train_test_split):", X_train.shape)
print("Validation set (train_test_split):", X_val.shape)
print("Train set labels shape:", y_train.shape)
print("Unique values in train set:", np.unique(y_train, return_counts=True))
print("Unique values in validation set:", np.unique(y_val, return_counts=True))
print("Unique values in test set:", np.unique(y_test, return_counts=True))

In [None]:
# Define batch size
batch_size = 128
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=X_test.shape[0], shuffle=False)

In [None]:
# Save the dataset and configuration parameters
torch.save(train_dataset, './Preprocessed_data/train_dataset_{}.pt'.format(option))
train_config = {
    'batch_size': batch_size,
    'shuffle': False,
    'num_workers': 0  # adjust as needed
}
torch.save(train_config, './Preprocessed_data/train_config_{}.pt'.format(option))

torch.save(val_dataset, './Preprocessed_data/val_dataset_{}.pt'.format(option))
val_config = {
    'batch_size': batch_size,
    'shuffle': False,
    'num_workers': 0  # adjust as needed
}
torch.save(val_config, './Preprocessed_data/val_config_{}.pt'.format(option))

torch.save(test_dataset, './Preprocessed_data/test_dataset_{}.pt'.format(option))
test_config = {
    'batch_size': X_test.shape[0],
    'shuffle': False,
    'num_workers': 0  # adjust as needed
}
torch.save(test_config, './Preprocessed_data/test_config_{}.pt'.format(option))