###Importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

###**Importing the datasets**


---


The HAI dataset was amassed from a genuine industrial control system (ICS) testbed, complemented by a Hardware-In-the-Loop (HIL) simulator that simulates steam-turbine and pumped-storage hydropower generation processes. The HAI 22.04 iteration features considerably advanced attacks, posing a markedly greater challenge in detection compared to its predecessors. A comparison of the baseline TaPRs from HAICon 2020 and HAICon 2021 reveals that the detection complexity in HAI 22.04 is roughly quadruple that of HAI 21.03.

In [None]:
df_train = pd.read_csv('train.csv')
x_train = df_train.iloc[:, 1:-1].values
y_train = df_train.iloc[:, -1].values

df_test = pd.read_csv('test.csv')
df_test = df_test.dropna()
x_test = df_test.iloc[:, 1:-1].values
y_test = df_test.iloc[:, -1].values

In [None]:
frequency_of_0 = df_train['Attack'].value_counts().get(0, 0)
frequency_of_1 = df_train['Attack'].value_counts().get(1, 0)

print(f"Frequency of 0: {frequency_of_0}")
print(f"Frequency of 1: {frequency_of_1}")

Frequency of 0: 317594
Frequency of 1: 7607


###Summary of datasets

In [None]:
df_train.info()
df_train.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 325201 entries, 0 to 325200
Data columns (total 88 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   timestamp     325201 non-null  object 
 1   P1_B2004      325201 non-null  float64
 2   P1_B2016      325201 non-null  float64
 3   P1_B3004      325201 non-null  float64
 4   P1_B3005      325201 non-null  float64
 5   P1_B4002      325201 non-null  float64
 6   P1_B4005      325201 non-null  float64
 7   P1_B400B      325201 non-null  float64
 8   P1_B4022      325201 non-null  float64
 9   P1_FCV01D     325201 non-null  float64
 10  P1_FCV01Z     325201 non-null  float64
 11  P1_FCV02D     325201 non-null  float64
 12  P1_FCV02Z     325201 non-null  float64
 13  P1_FCV03D     325201 non-null  float64
 14  P1_FCV03Z     325201 non-null  float64
 15  P1_FT01       325201 non-null  float64
 16  P1_FT01Z      325201 non-null  float64
 17  P1_FT02       325201 non-null  float64
 18  P1_F

Unnamed: 0,P1_B2004,P1_B2016,P1_B3004,P1_B3005,P1_B4002,P1_B4005,P1_B400B,P1_B4022,P1_FCV01D,P1_FCV01Z,...,P4_HT_PS,P4_LD,P4_ST_FD,P4_ST_GOV,P4_ST_LD,P4_ST_PO,P4_ST_PS,P4_ST_PT01,P4_ST_TT01,Attack
count,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,...,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0,325201.0
mean,0.130551,1.119767,452.329647,988.489865,26.603878,17.619349,562.395995,30.401249,13.376644,13.020248,...,4.386058,365.825227,-2.8e-05,18206.667572,333.149549,333.414131,18.501361,9976.725517,27133.814579,0.023392
std,0.044084,0.114957,29.898207,29.082949,1.054214,22.739745,717.377481,1.099583,11.900131,12.082759,...,3.0347,60.037508,0.001637,1918.452201,38.98567,35.583305,5.512458,32.004469,102.945173,0.151144
min,0.059445,0.77193,402.14911,952.00659,25.46139,0.0,7.15028,28.26935,0.0,0.27618,...,0.716042,230.75812,-0.01251,12758.0,229.70923,232.69312,2.5498,9669.0,26855.0,0.0
25%,0.08771,1.04627,421.57135,960.12628,25.46139,0.0322,31.97761,29.30814,5.6941,5.05981,...,0.716042,318.26898,-0.00109,17091.0,307.58099,312.84363,13.62378,9956.0,27068.0,0.0
50%,0.14577,1.11444,451.94196,984.47455,26.92264,11.30904,273.29919,30.5687,13.3082,12.91046,...,2.85153,364.40247,-7e-05,18124.0,332.37488,331.72382,20.989594,9973.0,27137.0,0.0
75%,0.15916,1.18511,482.31079,1008.80188,27.736847,23.41021,784.37805,31.45462,15.8873,15.75622,...,7.08818,413.68268,0.00101,19191.0,355.30597,351.43591,21.25391,9999.0,27217.0,0.0
max,0.19972,1.62153,494.23264,1046.19189,27.82839,100.0,3190.0,33.14033,81.36914,81.87256,...,8.81242,499.78296,0.01085,26695.0,495.62354,490.10779,26.74452,10248.0,27421.0,1.0


In [None]:
df_test.info()
df_test.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129600 entries, 0 to 129599
Data columns (total 88 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   timestamp     129600 non-null  object 
 1   P1_B2004      129600 non-null  float64
 2   P1_B2016      129600 non-null  float64
 3   P1_B3004      129600 non-null  float64
 4   P1_B3005      129600 non-null  float64
 5   P1_B4002      129600 non-null  float64
 6   P1_B4005      129600 non-null  float64
 7   P1_B400B      129600 non-null  float64
 8   P1_B4022      129600 non-null  float64
 9   P1_FCV01D     129600 non-null  float64
 10  P1_FCV01Z     129600 non-null  float64
 11  P1_FCV02D     129600 non-null  float64
 12  P1_FCV02Z     129600 non-null  float64
 13  P1_FCV03D     129600 non-null  float64
 14  P1_FCV03Z     129600 non-null  float64
 15  P1_FT01       129600 non-null  float64
 16  P1_FT01Z      129600 non-null  float64
 17  P1_FT02       129600 non-null  float64
 18  P1_F

Unnamed: 0,P1_B2004,P1_B2016,P1_B3004,P1_B3005,P1_B4002,P1_B4005,P1_B400B,P1_B4022,P1_FCV01D,P1_FCV01Z,...,P4_HT_PS,P4_LD,P4_ST_FD,P4_ST_GOV,P4_ST_LD,P4_ST_PO,P4_ST_PS,P4_ST_PT01,P4_ST_TT01,Attack
count,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,...,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0,129600.0
mean,0.115783,1.108963,454.016721,1011.474169,25.665021,10.867804,346.586848,29.475678,9.915436,9.490678,...,4.288056,366.512806,-2.4e-05,18279.336127,334.098159,334.363649,18.370883,9973.818333,27054.488511,0.034128
std,0.030014,0.109043,20.765086,28.187576,0.06715,15.124039,468.713602,0.351829,6.634874,6.8335,...,2.611195,59.283106,0.001646,1914.092136,38.947866,35.433585,8.607517,28.094692,71.166853,0.181559
min,0.06324,0.78515,419.7265,955.51141,25.46139,0.0,11.12275,28.35557,0.0,0.29144,...,2.40398,230.52301,-0.01128,12806.0,230.9751,234.28461,6.8288,9748.0,26901.0,0.0
25%,0.08793,1.04241,432.86948,973.44733,25.68717,0.0,27.01222,29.26876,4.818848,4.54101,...,2.40398,319.29974,-0.00109,17170.0,308.61182,313.92865,6.8288,9951.0,26993.0,0.0
50%,0.11238,1.10267,456.42471,1019.37964,25.68717,4.367945,199.81015,29.46178,8.06654,7.657625,...,3.34428,364.49292,0.0,18171.0,332.95355,332.15784,21.92189,9973.0,27058.0,0.0
75%,0.14577,1.166585,467.03854,1032.92322,25.68717,14.9943,388.49796,29.65137,14.767672,14.28375,...,3.34428,413.79126,0.00101,19213.0,355.59534,351.39972,26.76094,9999.0,27106.0,0.0
max,0.19464,1.58444,489.16266,1047.18567,25.68717,67.14787,2228.75806,31.02771,35.03899,34.91364,...,9.9604,498.58942,0.01548,26731.0,497.23309,493.03748,28.4608,10225.0,27255.0,1.0


###Handling missing values

In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(x_train[:, :])
x_train[:, :] = imputer.transform(x_train[:, :])

###Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
print(x_train)

[[-1.61297311 -0.66204821 -0.70243625 ...  0.45138416  0.72722714
   0.45835557]
 [-1.61297311 -0.67500959 -0.70243625 ...  0.45138416  0.69598145
   0.3903581 ]
 [-1.61297311 -0.70084536 -0.70243625 ...  0.45138416  0.69598145
   0.37093026]
 ...
 [ 1.45377783  3.96238056  1.23194913 ...  0.74402475  2.38324842
  -0.04676846]
 [ 1.45377783  3.83694224  1.23194913 ...  0.74402475  2.38324842
  -0.02734061]
 [ 1.45377783  3.73046756  1.23194913 ...  0.74402475  2.38324842
  -0.07591023]]


In [None]:
print(x_test)

[[ 1.45377783 -1.04897583  1.23194913 ...  1.80671748 -0.14765203
  -0.74617095]
 [ 1.45377783 -1.04897583  1.23194913 ...  1.80671748 -0.14765203
  -0.80445449]
 [ 1.45377783 -0.98529979  1.23194913 ...  1.80671748 -0.14765203
  -0.81416842]
 ...
 [-1.38624152 -0.62090236  0.49196649 ...  1.49835003 -1.89741036
  -1.56214053]
 [-1.38624152 -0.671704    0.49196649 ...  1.49835003 -1.89741036
  -1.54271268]
 [-1.38624152 -0.68762301  0.49196649 ...  1.49835003 -1.89741036
  -1.61071015]]


###Training the classification model on the training dataset

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(x_train, y_train)

###Predicting test set results

In [None]:
y_pred = classifier.predict(x_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


###Making the confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))

[[125057    120]
 [  4227    196]]
0.9664583333333333
