In [None]:
import pandas as pd


In [None]:
train = pd.read_csv('https://raw.githubusercontent.com/Nir-J/ML-Projects/master/UNSW-Network_Packet_Classification/UNSW_NB15_training-set.csv')
test = pd.read_csv('https://raw.githubusercontent.com/Nir-J/ML-Projects/master/UNSW-Network_Packet_Classification/UNSW_NB15_testing-set.csv')
df = pd.concat([train, test]).drop(['id'],axis=1)

In [None]:
def BinMap(df):
    # map label to either 0 or 1
    try:
        if not isinstance(df, pd.DataFrame):
            raise TypeError
        df['attack_cat']  = df['attack_cat'].apply(lambda v: 0 if v == "Normal" else 1)
        df=df.drop("label",axis=1)
    except TypeError as ex:
        print("Expected type of arg: pandas.core.frame.DataFrame")
    except KeyError:
        print("Please ensure df has col named 'label'")
    else:
        return df

In [None]:
df=BinMap(df)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from numpy import array
from pandas.core.frame import DataFrame
 
class Scaling:
    def __init__(self) -> None:
        return
 
    @staticmethod
    def scaling(df, col):
        #scale each columns using min max scaler
        try:
            if not isinstance(df, DataFrame):
                raise TypeError
            scaler = MinMaxScaler(feature_range=(0,1))
            for i in col:
                arr = df[i]
                arr = array(arr)
                df[i] = scaler.fit_transform(arr.reshape(len(arr), 1))
        except TypeError as ex:
            print("Expected type of arg: pandas.core.frame.DataFrame")
        except Exception as ex:
            print(f"Exception occured! {str(ex)}")
        else:
            return df

In [None]:
df=Scaling.scaling(df,df.select_dtypes(["float64","int64"]).columns)

In [None]:
df_copy=df.copy()

In [None]:
from pandas import get_dummies
from pandas.core.frame import DataFrame
from sklearn.preprocessing import LabelEncoder
 
class Encoding:
    def __init__(self) -> None:
        return
 
    @staticmethod
    def OHencoding(df):
        try:
            if not isinstance(df, DataFrame):
                raise TypeError
            # select categorical columns
            categorical = df.select_dtypes(['object']).columns
            # perform OHE
            cat = get_dummies(df[categorical], columns=categorical)
            # drop the categorical columns after OHE
            df = df.drop(categorical, axis=1)
            # join the OHE dataframe with original Dataframe
            for i in cat.columns:
                df[i] = cat[i]
        except TypeError as ex:
            print("Expected type of arg: pandas.core.frame.DataFrame")
        except Exception as ex:
            print(f"An Exception Occured! {str(ex)}")
        else:
            return df

    @staticmethod
    def Labencoding(df):
        try:
            if not isinstance(df, DataFrame):
                raise TypeError
            enc = LabelEncoder()
            # Label encode all the categorical columns
            for i in df.select_dtypes(['object']).columns:
                df[i] = enc.fit_transform(df[i])
        except TypeError as ex:
            print("Expected type of arg: pandas.core.frame.DataFrame")
        except Exception as ex:
            print(f"An Exception Occured! {str(ex)}")
        else:
            return df

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import train_test_split
 

class FeatureSelection:
    def __init__(self) -> None:
        return
 
    @staticmethod
    def select_features(X, y):
        try:
            # Select K best features based on f-score
            fs = SelectKBest(score_func=f_classif, k=15)
            fs.fit(X, y)
            m = fs.get_support()
            new_features = X.columns[m]
            nfs = []
            for i in new_features:
                nfs.append(i)
            # append target feature and return
            nfs.append('attack_cat')
        except Exception as ex:
            print(f"An Exception Occured! {str(ex)}")
        else:
            return nfs

In [None]:
df_copy = Encoding.Labencoding(df_copy)
#Feature Selection
X = df_copy.drop('attack_cat', axis=1)
y = df_copy['attack_cat']
new_fs = FeatureSelection.select_features(X, y)
print("Selected features: ")
print(new_fs)
df = df[new_fs]

Selected features: 
['state', 'rate', 'sttl', 'dload', 'swin', 'stcpb', 'dtcpb', 'dwin', 'dmean', 'ct_state_ttl', 'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm', 'ct_src_ltm', 'ct_srv_dst', 'attack_cat']


In [None]:
df = Encoding.OHencoding(df)

In [None]:
df

Unnamed: 0,rate,sttl,dload,swin,stcpb,dtcpb,dwin,dmean,ct_state_ttl,ct_src_dport_ltm,...,state_CLO,state_CON,state_ECO,state_FIN,state_INT,state_PAR,state_REQ,state_RST,state_URN,state_no
0,0.000074,0.988235,0.000379,1.0,0.144768,0.512828,1.0,0.028667,0.000000,0.000000,...,0,0,0,1,0,0,0,0,0,0
1,0.000078,0.243137,0.022458,1.0,0.330128,0.716524,1.0,0.737333,0.166667,0.000000,...,0,0,0,1,0,0,0,0,0,0
2,0.000014,0.243137,0.002717,1.0,0.492706,0.689918,1.0,0.549333,0.166667,0.000000,...,0,0,0,1,0,0,0,0,0,0
3,0.000014,0.243137,0.000150,1.0,0.257772,0.243882,1.0,0.042667,0.166667,0.000000,...,0,0,0,1,0,0,0,0,0,0
4,0.000033,0.996078,0.000178,1.0,0.567209,0.460351,1.0,0.030000,0.166667,0.017241,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82327,0.200000,0.996078,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.333333,0.000000,...,0,0,0,0,1,0,0,0,0,0
82328,0.000024,0.996078,0.000100,1.0,0.249720,0.764699,1.0,0.029333,0.166667,0.000000,...,0,0,0,1,0,0,0,0,0,0
82329,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.333333,0.000000,...,0,0,0,0,1,0,0,0,0,0
82330,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.333333,0.000000,...,0,0,0,0,1,0,0,0,0,0
