In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import QuantileTransformer

In [2]:
base_path = r"C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project"

In [3]:
train=pd.read_csv(base_path + r"\Dataset\Final Dataset\Train\train.csv", header=None)
test=pd.read_csv(base_path + r"\Dataset\Final Dataset\Test\test.csv", header=None)

In [4]:
train.columns = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes",
    "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", 
    "logged_in", "num_compromised", "root_shell", "su_attempted", "num_root", 
    "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds", 
    "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate", 
    "srv_serror_rate", "rerror_rate", "srv_rerror_rate", "same_srv_rate", 
    "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", 
    "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", 
    "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate", 
    "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "attack"
]

test.columns = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes",
    "land", "wrong_fragment", "urgent", "hot", "num_failed_logins", 
    "logged_in", "num_compromised", "root_shell", "su_attempted", "num_root", 
    "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds", 
    "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate", 
    "srv_serror_rate", "rerror_rate", "srv_rerror_rate", "same_srv_rate", 
    "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count", 
    "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", 
    "dst_host_srv_diff_host_rate", "dst_host_serror_rate", "dst_host_srv_serror_rate", 
    "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "attack"
]

In [5]:
# List of symbolic (categorical) columns
symbolic_columns = [
    "protocol_type", "service", "flag", "land", "logged_in", 
    "is_host_login", "is_guest_login"
]

# Encode each symbolic column using map and pd.Categorical
for col in symbolic_columns:
    train[col] = pd.Categorical(train[col]).codes
    test[col] = pd.Categorical(test[col]).codes


In [6]:
y_values = {
    "normal": 0,
    "neptune": 1,
    "smurf": 2,
    "saint": 3,
    "snmpgetattack": 4,
    "ipsweep": 5,
    "mailbomb": 6,
    "guess_passwd": 7,
    "snmpguess": 8,
    "warezmaster": 9,
    "satan": 10,
    "back": 11,
    "processtable": 12,
    "portsweep": 13,
    "httptunnel": 14,
    "mscan": 15,
    "apache2": 16,
    "pod": 17,
    "ps": 18,
    "nmap": 19,
    "multihop": 20,
    "xterm": 21,
    "teardrop": 22,
    "rootkit": 23,
    "named": 24,
    "ftp_write": 25,
    "buffer_overflow": 26,
    "sqlattack": 27,
    "xsnoop": 28,
    "perl": 29,
    "land": 30,
    "phf": 31,
    "sendmail": 32,
    "xlock": 33,
    "udpstorm": 34,
    "worm": 35,
    "loadmodule": 36
}


In [7]:
X_train=train.iloc[:,0:-1]
Y_train=train.iloc[:,-1].map(y_values)
X_test=test.iloc[:,0:-1]
Y_test=test.iloc[:,-1].map(y_values)

In [8]:
X_train.fillna(value=0,inplace=True)
X_train.replace([np.inf],0,inplace=True)
X_test.fillna(value=0,inplace=True)
X_test.replace([np.inf],0,inplace=True)

In [9]:
X_train=np.array(X_train, dtype=np.float32)
Y_train=np.array(Y_train, dtype=np.float32)
X_test=np.array(X_test, dtype=np.float32)
Y_test=np.array(Y_test, dtype=np.float32)

In [10]:
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(248822, 41) (248822,)
(62206, 41) (62206,)


In [11]:
scaler=QuantileTransformer()
scaler.fit_transform(X_train)
X_train_scaled=scaler.transform(X_train)
X_test_scaled=scaler.transform(X_test)

In [12]:
import joblib
# Save the scaler
joblib.dump(scaler, base_path + '/Results/Preprocessing/quantile_transformer.joblib')

['C:\\Users\\Sagar\\Python files_Jupiter\\Git Repo Local\\Smart Cities\\Project/Results/Preprocessing/quantile_transformer.joblib']

In [13]:
import numpy as np
import tensorflow as tf
import os

# Define the base path where you want to create the directories
target_path = base_path + r"\Dataset\Final Dataset\Train\Images"  # Replace with your desired path

# Loop through 0 to 36 and create directories if they don't exist
for i in range(37):
    dir_path = os.path.join(target_path, str(i))
    
    # Check if the directory exists, if not, create it
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print(f"Created directory: {dir_path}")
    else:
        print(f"Directory already exists: {dir_path}")

# Prepare variables
image = []
label = np.zeros(37, dtype=int)  # Assuming 9 attack types (one-hot encoding)
index = 0
window_size = 26  # Window size for the sliding window

# Iterate over the data with a sliding window approach
for i in range(len(X_train_scaled) - window_size + 1):
    # Get the current window (26 records)
    window = X_train_scaled[i:i + window_size]
    
    # The label is the attack type of the last record in the window
    last_record_index = i + window_size - 1
    label[:] = 0  # Reset the label
    label[int(Y_train[last_record_index])] = 1  # Assign the attack type of the last record
    
    # Reshape the window into the image format (window_size, num_columns, 1)
    arr = np.array(window).reshape(window_size, 41, 1)

    # Save the image with the label as the folder name
    y_val = np.argmax(label)  # Get the attack type (class index)
    
    # Define paths
    img_save_path = target_path + f"/{y_val}/img_{str(index)}_{str(y_val)}.png"
    label_save_path = r".\Dataset\Final Dataset\Train\Labels\train_img.csv"
    
    # Save the image
    tf.keras.utils.save_img(img_save_path, arr, file_format='png')
    
    # Append the label to the CSV file
    with open(label_save_path, "ab") as file:
        np.savetxt(file, [label], fmt='%d', delimiter=",")
    
    # Increment the index for the next image
    index += 1

Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\0
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\1
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\2
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\3
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\4
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\5
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Train\Images\6
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final

In [14]:
import numpy as np
import tensorflow as tf
import os

# Define the base path for test images
test_target_path = base_path + r"\Dataset\Final Dataset\Test\Images"

# Loop through 0 to 36 and create directories if they don't exist for test
for i in range(37):
    dir_path = os.path.join(test_target_path, str(i))
    
    # Check if the directory exists, if not, create it
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print(f"Created directory: {dir_path}")
    else:
        print(f"Directory already exists: {dir_path}")

# Prepare variables
window_size = 26  # Window size for the sliding window
index = 0

# Iterate over the test dataset with a sliding window approach
for i in range(len(X_test_scaled) - window_size + 1):
    # Get the current window (26 records)
    window = X_test_scaled[i:i + window_size]
    
    # The label is the attack type of the last record in the window
    last_record_index = i + window_size - 1
    label = np.zeros(37, dtype=int)  # Reset the label (one-hot encoding)
    label[int(Y_test[last_record_index])] = 1  # Assign the attack type of the last record
    
    # Reshape the window into the image format (window_size, num_columns, 1)
    arr = np.array(window).reshape(window_size, 41, 1)
    
    # Get the attack type (class index)
    y_val = np.argmax(label)
    
    # Define paths
    img_save_path = os.path.join(test_target_path, f"{y_val}", f"img_{index}_{y_val}.png")
    label_save_path = r".\Dataset\Final Dataset\Test\Labels\test_img.csv"
    
    # Save the image
    tf.keras.utils.save_img(img_save_path, arr, file_format='png')
    
    # Append the label to the CSV file
    with open(label_save_path, "ab") as file:
        np.savetxt(file, [label], fmt='%d', delimiter=",")
    
    # Increment the index for the next image
    index += 1


Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\0
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\1
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\2
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\3
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\4
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\5
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Dataset\Test\Images\6
Created directory: C:\Users\Sagar\Python files_Jupiter\Git Repo Local\Smart Cities\Project\Dataset\Final Datase