In [156]:
import pandas as pd
import numpy as np
from sklearn import preprocessing 
from sklearn_pandas import DataFrameMapper
from sklearn.metrics import accuracy_score

#temp
import warnings
warnings.filterwarnings(action='ignore')

def ipInt(ip):
  ipInt = ''
  for i in ip.split('.'):
    ipInt += i
  return int(ipInt)

columns = ["IP source", "IP destiny", "L2 protocol", "Source port", "Destiny port", "Package size"]

X_train = pd.read_csv("IP_DataFrame.csv", index_col = 0)
X_train["L2 protocol"] = X_train["L2 protocol"].astype("category")

X_test = pd.read_csv("IP_DataFrame_50Test.csv", index_col = 0)
X_test["L2 protocol"] = X_test["L2 protocol"].astype("category")

In [157]:
mapper = DataFrameMapper([(["IP source", "IP destiny"], preprocessing.StandardScaler()),
                          ("L2 protocol", preprocessing.LabelBinarizer()),
                          (["Source port", "Destiny port", "Package size"], preprocessing.StandardScaler())
                         ])

mapper.fit(X_train)

DataFrameMapper(default=False, df_out=False,
        features=[(['IP source', 'IP destiny'], StandardScaler(copy=True, with_mean=True, with_std=True)), ('L2 protocol', LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)), (['Source port', 'Destiny port', 'Package size'], StandardScaler(copy=True, with_mean=True, with_std=True))],
        input_df=False, sparse=False)

In [158]:
X = mapper.transform(X_test)
y_true = [1 if X_train.ix[i].equals(X_test.ix[i]) else -1 for i in range(X_train.shape[0])]

In [159]:
from sklearn.svm import OneClassSVM
clf = OneClassSVM()
clf.fit(X)
y_pred = clf.predict(X)
print(y_pred)
print("Acc:", accuracy_score(y_true, y_pred))

[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
 -1  1 -1 -1 -1  1  1  1 -1 -1 -1  1 -1 -1 -1 -1 -1  1 -1 -1 -1  1  1  1  1
  1 -1  1  1  1  1  1 -1 -1  1  1 -1 -1 -1 -1  1  1 -1  1  1 -1 -1 -1  1 -1
  1  1  1  1  1 -1 -1 -1  1  1  1 -1 -1  1 -1 -1  1 -1 -1 -1 -1  1  1 -1 -1
 -1 -1 -1 -1  1 -1  1  1  1 -1  1 -1  1  1 -1 -1  1  1 -1 -1 -1 -1  1 -1 -1
  1 -1  1 -1 -1 -1 -1  1 -1  1 -1 -1 -1 -1 -1 -1 -1  1  1 -1  1 -1 -1  1 -1
 -1  1]
Acc: 0.305084745763


In [160]:
from sklearn.ensemble import IsolationForest
clf = IsolationForest()
clf.fit(X)
y_pred = clf.predict(X)
print(y_pred)
print("Acc:", accuracy_score(y_true, y_pred))

[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1 -1  1  1  1
  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1 -1 -1  1  1  1 -1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1  1  1  1  1 -1  1  1 -1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1
 -1  1 -1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1
  1  1  1 -1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1]
Acc: 0.649717514124


In [161]:
from sklearn.neighbors import LocalOutlierFactor
clf = LocalOutlierFactor()
y_pred = clf.fit_predict(X)
print(y_pred)
print("Acc: ", accuracy_score(y_true, y_pred))

[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1 -1  1  1  1
  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1 -1 -1  1  1  1 -1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1  1  1  1  1 -1  1  1 -1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1
 -1  1 -1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1
  1  1  1 -1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1]
Acc:  0.649717514124


In [162]:
from sklearn.covariance import EllipticEnvelope
clf = EllipticEnvelope()
clf.fit(X)
y_pred = clf.predict(X)
print(y_pred)
print("Acc:", accuracy_score(y_true, y_pred))

[ 1 -1 -1  1  1  1 -1  1  1  1  1 -1  1  1 -1  1  1  1 -1 -1  1  1  1 -1 -1
  1 -1  1  1 -1  1  1  1  1  1 -1  1 -1  1 -1 -1 -1  1 -1  1  1  1 -1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1]
Acc: 0.819209039548


In [163]:
clf.predict(X[:1])

array([1])