In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
pd.set_option('display.max_columns', None)

In [3]:
file_path = 'Data/Datasets/5G NIDD/Combined.csv'
if os.path.exists(file_path):
    df_5g_nidd = pd.read_csv(file_path)
    print('5G NIDD dataset loaded successfully.')
else:
    print(f'File not found: {file_path}')

  df_5g_nidd = pd.read_csv(file_path)


5G NIDD dataset loaded successfully.


In [4]:
# Replace infinite values (e.g., in 'Rate' columns) with NaN
df_5g_nidd.replace([np.inf, -np.inf], np.nan, inplace=True)
# Fill remaining NaNs. For simplicity, we fill with 0, but for production,
# a more sophisticated imputation strategy (e.g., mean/median) should be used.
df_5g_nidd.fillna(0, inplace=True)

In [5]:
df_5g_nidd = df_5g_nidd.select_dtypes(exclude=['object'])

In [6]:
df_5g_nidd = df_5g_nidd.astype(float)

In [7]:
df_5g_nidd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1215890 entries, 0 to 1215889
Data columns (total 44 columns):
 #   Column      Non-Null Count    Dtype  
---  ------      --------------    -----  
 0   Unnamed: 0  1215890 non-null  float64
 1   Seq         1215890 non-null  float64
 2   Dur         1215890 non-null  float64
 3   RunTime     1215890 non-null  float64
 4   Mean        1215890 non-null  float64
 5   Sum         1215890 non-null  float64
 6   Min         1215890 non-null  float64
 7   Max         1215890 non-null  float64
 8   sTos        1215890 non-null  float64
 9   dTos        1215890 non-null  float64
 10  sTtl        1215890 non-null  float64
 11  dTtl        1215890 non-null  float64
 12  sHops       1215890 non-null  float64
 13  dHops       1215890 non-null  float64
 14  TotPkts     1215890 non-null  float64
 15  SrcPkts     1215890 non-null  float64
 16  DstPkts     1215890 non-null  float64
 17  TotBytes    1215890 non-null  float64
 18  SrcBytes    1215890 no

# Train / Test / Validation split

In [8]:
random_generator = np.random.RandomState(42)
shuffle_index = random_generator.permutation(len(df_5g_nidd))
df_5g_nidd = df_5g_nidd.iloc[shuffle_index].reset_index(drop=True)

In [9]:
train_index = int(0.7 * len(df_5g_nidd))
train_df = df_5g_nidd[:train_index]

In [10]:
valid_index = int(0.85 * len(df_5g_nidd))
valid_df = df_5g_nidd[train_index:valid_index]

In [11]:
test_df = df_5g_nidd[valid_index:]

In [12]:
scaler = StandardScaler()
train_df.iloc[:, :] = scaler.fit_transform(train_df)
valid_df.iloc[:, :] = scaler.transform(valid_df)
test_df.iloc[:, :] = scaler.transform(test_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df.iloc[:, :] = scaler.fit_transform(train_df)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid_df.iloc[:, :] = scaler.transform(valid_df)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df.iloc[:, :] = scaler.transform(test_df)


In [13]:
train_df.to_csv('Data/Datasets/5G NIDD/train_5g_nidd.csv', index=False)
valid_df.to_csv('Data/Datasets/5G NIDD/valid_5g_nidd.csv', index=False)
test_df.to_csv('Data/Datasets/5G NIDD/test_5g_nidd.csv', index=False)

In [14]:
train_df.describe()

Unnamed: 0.1,Unnamed: 0,Seq,Dur,RunTime,Mean,Sum,Min,Max,sTos,dTos,sTtl,dTtl,sHops,dHops,TotPkts,SrcPkts,DstPkts,TotBytes,SrcBytes,DstBytes,Offset,sMeanPktSz,dMeanPktSz,Load,SrcLoad,DstLoad,Loss,SrcLoss,DstLoss,pLoss,SrcGap,DstGap,Rate,SrcRate,DstRate,SrcWin,DstWin,sVid,dVid,SrcTCPBase,DstTCPBase,TcpRtt,SynAck,AckDat
count,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0,851123.0
mean,1.268941e-18,-4.452981e-17,1.507702e-17,1.507702e-17,1.507702e-17,1.507702e-17,1.507702e-17,1.507702e-17,1.518555e-17,1.5636360000000002e-17,3.0354410000000004e-17,-1.9351350000000003e-17,2.3805170000000003e-17,-5.748637e-17,1.6938690000000002e-17,1.5494440000000002e-17,6.3614020000000005e-18,-1.3282140000000002e-17,6.820558e-18,-1.4233850000000002e-17,-2.861796e-17,-1.055684e-16,-1.222859e-16,-1.410862e-18,-3.264184e-18,-6.17774e-18,1.1320290000000002e-17,1.185458e-17,-1.7335240000000002e-17,3.589768e-18,3.059651e-18,-1.047711e-18,-1.769839e-18,4.0071819999999995e-19,-9.517058e-19,-3.462873e-17,7.880792e-18,-1.898403e-17,4.140755e-18,2.896859e-17,-8.223072000000001e-17,-2.4043090000000002e-18,2.911886e-17,-3.526321e-17
std,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001,1.000001
min,-1.624433,-1.068365,-0.806816,-0.806816,-0.806816,-0.806816,-0.806816,-0.806816,-0.06910564,-0.05888659,-1.456298,-0.4835164,-0.6331633,-0.4773339,-0.1670278,-0.2012968,-0.1160896,-0.1197036,-0.103949,-0.06620647,-1.157833,-0.507369,-0.287475,-0.008735143,-0.01013651,-0.008656429,-0.09221462,-0.07509896,-0.06185657,-0.09560831,-0.002446521,-0.003601172,-0.01096909,-0.01027198,-0.009210328,-0.08108891,-0.1205363,-0.3224054,-0.040952,-0.4494838,-0.4063404,-0.2646086,-0.04411618,-0.3881188
25%,-0.8405908,-0.8338822,-0.806816,-0.806816,-0.806816,-0.806816,-0.806816,-0.806816,-0.06910564,-0.05888659,-0.3296773,-0.4835164,-0.3527334,-0.4773339,-0.1670278,-0.147039,-0.1160896,-0.1197036,-0.1022326,-0.06620647,-0.8721267,-0.2198328,-0.287475,-0.008735143,-0.01013651,-0.008656429,-0.09221462,-0.07509896,-0.06185657,-0.09560831,-0.002446521,-0.003601172,-0.01096909,-0.01027198,-0.009210328,-0.08108891,-0.1205363,-0.3224054,-0.040952,-0.4494838,-0.4063404,-0.2646086,-0.04411618,-0.3881188
50%,-0.06085988,-0.3471576,-0.806816,-0.806816,-0.806816,-0.806816,-0.806816,-0.806816,-0.06910564,-0.05888659,-0.3296773,-0.4835164,-0.3527334,-0.4773339,-0.126793,-0.147039,-0.1160896,-0.1183113,-0.1009249,-0.06620647,-0.2306676,-0.2198328,-0.287475,-0.008735143,-0.01013651,-0.008656429,-0.09221462,-0.07509896,-0.06185657,-0.09560831,-0.002446521,-0.003601172,-0.01096909,-0.01027198,-0.009210328,-0.08108891,-0.1205363,-0.3224054,-0.040952,-0.4494838,-0.4063404,-0.2646086,-0.04411618,-0.3881188
75%,0.7192514,0.6091684,0.7191709,0.7191709,0.7191709,0.7191709,0.7191709,0.7191709,-0.06910564,-0.05888659,-0.3296773,-0.4835164,-0.3527334,-0.4773339,-0.126793,-0.09278114,-0.1160896,-0.1183113,-0.1005162,-0.06620647,0.6298988,-0.04868033,-0.287475,-0.008734907,-0.01012697,-0.008656429,-0.09221462,-0.07509896,-0.06185657,-0.09560831,-0.002446521,-0.003601172,-0.01096436,-0.01025464,-0.009210328,-0.08108891,-0.1205363,-0.3224054,-0.040952,-0.4494838,-0.4063404,-0.2646086,-0.04411618,-0.3881188
max,2.118455,2.562921,10.97406,10.97406,10.97406,10.97406,10.97406,10.97406,17.79134,18.54628,3.103832,7.925554,7.218874,20.68189,160.611,45.86361,253.6765,154.2315,36.91009,271.5668,2.551556,9.022403,6.588008,156.6489,443.9486,154.9541,111.2975,83.80738,82.30885,16.83722,619.1278,743.1601,170.5524,223.2191,153.5577,14.73233,198.3783,3.101685,24.41883,3.668027,3.889691,59.1411,76.42095,24.915
