In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn import svm

%matplotlib inline

In [3]:
dataset_train = pd.read_csv("Training_day_9.txt", sep = ",", na_values = ['?','.'])
dataset_test = pd.read_csv("Test_day_9.txt", sep = ",", na_values = ['?','.'])

dataset_train = dataset_train.drop('Unnamed: 0',1)
dataset_test = dataset_test.drop('Unnamed: 0',1)
dataset_train = dataset_train.drop('time',1)
dataset_test = dataset_test.drop('time',1)

In [3]:
data = pd.concat([dataset_train, dataset_test])

In [4]:
data.head()

Unnamed: 0,source_user@domain,destination_user@domain,source_computer,destination_computer,authentication_type,logon_type,authentication_orientation,success/failure,Malignant/Benign
0,C809$@DOM1,C809$@DOM1,C809,C457,Kerberos,Network,LogOn,Success,0
1,C16953$@DOM1,C16953$@DOM1,C16953,C1065,Kerberos,Network,LogOn,Success,0
2,C4664$@DOM1,C4664$@DOM1,C4665,C2106,Kerberos,Network,LogOn,Success,0
3,U22@DOM1,U22@DOM1,C477,C528,Kerberos,Network,LogOn,Success,0
4,C995$@DOM1,C995$@DOM1,C995,C625,Kerberos,Network,LogOn,Success,0


### Processing the data

In [5]:
data.iloc[:,0] = pd.factorize(data.iloc[:,0])[0]
data.iloc[:,1] = pd.factorize(data.iloc[:,1])[0]
data.iloc[:,2] = pd.factorize(data.iloc[:,2])[0]
data.iloc[:,3] = pd.factorize(data.iloc[:,3])[0]
data.iloc[:,4] = pd.factorize(data.iloc[:,4])[0]
data.iloc[:,5] = pd.factorize(data.iloc[:,5])[0]
data.iloc[:,6] = pd.factorize(data.iloc[:,6])[0]
data.iloc[:,7] = pd.factorize(data.iloc[:,7])[0]

### Normalizing and Scaling the data

In [6]:
data['source_user@domain'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['source_user@domain'].values.reshape(-1, 1))
data['destination_user@domain'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['destination_user@domain'].values.reshape(-1, 1))
data['source_computer'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['source_computer'].values.reshape(-1, 1))
data['destination_computer'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['destination_computer'].values.reshape(-1, 1))
data['authentication_type'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['authentication_type'].values.reshape(-1, 1))
data['logon_type'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['logon_type'].values.reshape(-1, 1))
data['authentication_orientation'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['authentication_orientation'].values.reshape(-1, 1))
data['success/failure'] = MinMaxScaler(feature_range= (0,1)).fit_transform(data['success/failure'].values.reshape(-1, 1))



In [7]:
data.head(2)

Unnamed: 0,source_user@domain,destination_user@domain,source_computer,destination_computer,authentication_type,logon_type,authentication_orientation,success/failure,Malignant/Benign
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,7.4e-05,7.2e-05,0.000104,0.00043,0.0,0.0,0.0,0.0,0


In [8]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [9]:
df = data
y = df['Malignant/Benign']
x = df.drop(['Malignant/Benign'], axis=1)

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.25, random_state=27)

### The model

In [11]:
clf = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=500, alpha=0.0001,
                     solver='adam', verbose=10,  random_state=21,tol=0.000000001)

In [12]:
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

Iteration 1, loss = 0.09420026
Iteration 2, loss = 0.01257282
Iteration 3, loss = 0.00951991
Iteration 4, loss = 0.00837878
Iteration 5, loss = 0.00734608
Iteration 6, loss = 0.00637110
Iteration 7, loss = 0.00538445
Iteration 8, loss = 0.00536571
Iteration 9, loss = 0.00437573
Iteration 10, loss = 0.00388300
Iteration 11, loss = 0.00373888
Iteration 12, loss = 0.00385279
Iteration 13, loss = 0.00397087
Iteration 14, loss = 0.00344292
Iteration 15, loss = 0.00417242
Iteration 16, loss = 0.00318487
Iteration 17, loss = 0.00283840
Iteration 18, loss = 0.00316833
Iteration 19, loss = 0.00275004
Iteration 20, loss = 0.00331874
Iteration 21, loss = 0.00260406
Iteration 22, loss = 0.00255442
Iteration 23, loss = 0.00267754
Iteration 24, loss = 0.00251551
Iteration 25, loss = 0.00229157
Iteration 26, loss = 0.00208983
Iteration 27, loss = 0.00218114
Iteration 28, loss = 0.00203859
Iteration 29, loss = 0.00216434
Iteration 30, loss = 0.00221852
Iteration 31, loss = 0.00206045
Training loss did

In [39]:
accuracy_score(y_test, y_pred)

0.9996019900497513

In [40]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[9998,    4],
       [   0,   48]])

In [41]:
TN, FP, FN, TP = cm.ravel()

print ("TP: ", TP)
print ("FN: ", FN)
print ("FP: ", FP)
print ("TN: ", TN)

TP:  48
FN:  0
FP:  4
TN:  9998


In [42]:
# Performance Matrix
accuracy = (TP+TN)/(TP+FN+FP+TN)
print ("Accuracy: ", np.round(accuracy, 2))
recall = TP/(TP+FN)
print ("recall: ", np.round(recall, 2))
specificity = TN/(TN+FP)
print ("Specificity: ", np.round(specificity, 2))
precision = TP/(TP + FP)
print ("precision: ", np.round(precision, 2))
F1 = 2/((1/precision) + (1/recall))
print ("F1: ", np.round(F1, 2))

Accuracy:  1.0
recall:  1.0
Specificity:  1.0
precision:  0.92
F1:  0.96


### Adding the number of layers

In [57]:
clf = MLPClassifier(hidden_layer_sizes=(100,100,100,100,100), max_iter=500, alpha=0.0001,
                     solver='adam', verbose=10,  random_state=21,tol=0.000000001)

In [58]:
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

Iteration 1, loss = 0.09491476
Iteration 2, loss = 0.01117612
Iteration 3, loss = 0.00912483
Iteration 4, loss = 0.00801677
Iteration 5, loss = 0.00721341
Iteration 6, loss = 0.00562845
Iteration 7, loss = 0.00577252
Iteration 8, loss = 0.00511370
Iteration 9, loss = 0.00442386
Iteration 10, loss = 0.00453176
Iteration 11, loss = 0.00387678
Iteration 12, loss = 0.00347026
Iteration 13, loss = 0.00346166
Iteration 14, loss = 0.00417002
Iteration 15, loss = 0.00354001
Iteration 16, loss = 0.00288802
Iteration 17, loss = 0.00291285
Iteration 18, loss = 0.00249435
Iteration 19, loss = 0.00259780
Iteration 20, loss = 0.00345311
Iteration 21, loss = 0.00275659
Training loss did not improve more than tol=0.000000 for two consecutive epochs. Stopping.


In [59]:
accuracy_score(y_test, y_pred)

0.9992039800995025

In [60]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[9998,    4],
       [   4,   44]])

In [61]:
TN, FP, FN, TP = cm.ravel()

print ("TP: ", TP)
print ("FN: ", FN)
print ("FP: ", FP)
print ("TN: ", TN)

TP:  44
FN:  4
FP:  4
TN:  9998


In [62]:
# Performance Matrix
accuracy = (TP+TN)/(TP+FN+FP+TN)
print ("Accuracy: ", np.round(accuracy, 2))
recall = TP/(TP+FN)
print ("recall: ", np.round(recall, 2))
specificity = TN/(TN+FP)
print ("Specificity: ", np.round(specificity, 2))
precision = TP/(TP + FP)
print ("precision: ", np.round(precision, 2))
F1 = 2/((1/precision) + (1/recall))
print ("F1: ", np.round(F1, 2))

Accuracy:  1.0
recall:  0.92
Specificity:  1.0
precision:  0.92
F1:  0.92


Although the result is instresting, this model is not very useful for log anomaly detection because the learning here is supervised.