<a href="https://colab.research.google.com/github/Enigmaaaaaa/Suduko-Solver/blob/main/SAE_ZSL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import scipy
import scipy.io
import argparse

In [2]:
def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument('--ld', type=float, default=500000) # lambda
	return parser.parse_args()

In [3]:
def normalizeFeature(x):
    # x = d x N dims (d: feature dimension, N: the number of features)
    x = pd.DataFrame(x)  # Converting 'x' to a pandas DataFrame
    x = x.apply(pd.to_numeric, errors='coerce').fillna(0)  # Converting each column to numeric type, replacing non-numeric values with 0
    x = x + 1e-10  # for avoid RuntimeWarning: invalid value encountered in divide
    feature_norm = np.sum(x**2, axis=1)**0.5  # l2-norm
    feat = x / feature_norm[:, np.newaxis]
    return feat

In [4]:
def SAE(x, s, ld):
	# SAE is Semantic Autoencoder
	# INPUTS:
	# 	x: d x N data matrix
	#	s: k x N semantic matrix
	#	ld: lambda for regularization parameter
	#
	# OUTPUT:
	#	w: kxd projection matrix

	A = np.dot(s, s.transpose())
	B = ld * np.dot(x, x.transpose())
	C = (1+ld) * np.dot(s, x.transpose())
	w = scipy.linalg.solve_sylvester(A,B,C)
	return w


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import pandas as pd

# Loading the Electra Dataset
electra_df = pd.read_csv('/content/drive/MyDrive/electra_modbus.csv')

# Updating the label encoding dictionary with string keys
label_encoding = {
    'READ_ATTACK': '4',
    'RECOGNITION_ATTACK': '1',
    'WRITE_ATTACK': '5',
    'FORCE_ERROR_ATTACK': '3',
    'RESPONSE_ATTACK': '2',
    'MITM_UNALTERED': '6',
    'NORMAL': '0'
}

# Label encoding
electra_df['label'] = electra_df['label'].map(label_encoding)

# Separating instances with the 'NORMAL' label
normal_instances = electra_df[electra_df['label'] == '0']

# Selecting 8% of instances with the 'NORMAL' label randomly
sampled_normal_instances = normal_instances.sample(frac=0.008, random_state=42)

# Removing instances of the three attack classes (RECOGNITION_ATTACK, FORCE_ERROR_ATTACK, RESPONSE_ATTACK) to form X_tr
excluded_classes = ['1', '3', '4']
excluded_classes1 = ['0','1', '3', '4']
X_tr = electra_df[~electra_df['label'].isin(excluded_classes1)]

# Concatenating the sampled normal instances with X_tr
X_tr = pd.concat([X_tr, sampled_normal_instances])

# Creating X_te by selecting instances belonging to the three attack classes
X_te = electra_df[electra_df['label'].isin(excluded_classes)]

# Reseting the index of X_tr and X_te
X_tr.reset_index(drop=True, inplace=True)
X_te.reset_index(drop=True, inplace=True)


In [7]:
# Removing instances with 'nan' labels from X_tr
X_tr = X_tr.dropna(subset=['label'])

In [8]:
# Printing the shape of X_tr to verify the dimensions
print("X_tr shape:", X_tr.shape)

# Printing the shape of X_te to verify the dimensions
print("X_te shape:", X_te.shape)


X_tr shape: (1687542, 11)
X_te shape: (817670, 11)


In [9]:
# S_tr Creation using SAE
# Normalizing the data
normalized_X_tr = normalizeFeature(X_tr.transpose()).transpose()

  feat = x / feature_norm[:, np.newaxis]


In [10]:
import argparse

def parse_args():
    parser = argparse.ArgumentParser(description="Description of your program.")
    parser.add_argument('--ld', type=float, default=0.01, help='Value of lambda (ld)')
    args, unknown = parser.parse_known_args()
    return args

# Calling parse_args() to get the parsed arguments
opts = parse_args()

In [11]:
# Training SAE
W = SAE(normalized_X_tr.transpose(), normalized_X_tr.transpose(), opts.ld)

# Encoding X_tr using SAE
S_tr = np.dot(normalized_X_tr, normalizeFeature(W).transpose())

  feat = x / feature_norm[:, np.newaxis]


In [12]:
# Printing the shape of S_tr to verify the dimensions
print("S_tr shape:", S_tr.shape)

S_tr shape: (1687542, 11)


In [13]:
# S_te Creation using SAE
# Normalizing the data
normalized_X_te = normalizeFeature(X_te.transpose()).transpose()

  feat = x / feature_norm[:, np.newaxis]


In [14]:
# Training SAE
V = SAE(normalized_X_te.transpose(), normalized_X_te.transpose(), opts.ld)

# Encoding X_tr using SAE
S_te = np.dot(normalized_X_te, normalizeFeature(V).transpose())

  feat = x / feature_norm[:, np.newaxis]


In [15]:
# Printing the shape of S_tr to verify the dimensions
print("S_te shape:", S_te.shape)

S_te shape: (817670, 11)


In [16]:
# Test_labels and Testclasses_id Creation
# Creating Test_labels and Testclasses_id arrays for X_te
Test_labels = X_te['label'].values.astype(str)
Testclasses_id = X_te['label'].unique().astype(str)

In [17]:
# Printing the shape to verify the dimensions
print("Test_labels:", Test_labels.shape)
print("Testclasses_id:", Testclasses_id.shape)


Test_labels: (817670,)
Testclasses_id: (3,)
