## Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import VarianceThreshold
from matplotlib import colors

## Loading Data

In [None]:
df = pd.read_excel('SWaT_Dataset_Attack_v0.xlsx', skiprows=[0])
df

In [None]:
df.shape

## Feature Engineering and Selection

### Fixing class labels

In [None]:
print("Total target classes:")
print("----------------------")
print(df["Normal/Attack"].value_counts()) 

In [None]:
df['Normal/Attack'].mask(df['Normal/Attack'] == 'A ttack', 'Attack', inplace=True)

In [None]:
print("Total target classes:")
print("----------------------")
print(df["Normal/Attack"].value_counts()) 

### Variance-Threshold Test for numerical variables

In [None]:
#numerical variables
numerical = df.select_dtypes( ['int64','float64']).columns
numerical

In [None]:
df[numerical]

In [None]:
var_thr = VarianceThreshold(threshold = 0.2) #Removing features that are at least 80% constant
var_thr.fit(df[numerical])

var_thr.get_support()

#True: Low Variance 
#False: High Variance

In [None]:
cons_cols = [column for column in df[numerical].columns 
          if column not in df[numerical].columns[var_thr.get_support()]]

In [None]:
for features in cons_cols:
    print(features)

In [None]:
df = df.drop(cons_cols,axis=1)

In [None]:
df

### Correlation matrix for numerical variables

In [None]:
corrmat= df.corr()
plt.figure(figsize=(15,15))  
sns.heatmap(corrmat,annot=True, cmap="PiYG", center=0,linewidths=.9)