## Load CP4S Data

In [None]:
!pip install matplotlib
!pip install sklearn
!pip install git+https://github.com/IBM/ibm-cp4s-client.git

In [None]:
from cp4s.client import CP4S
from os import environ as env
ac = CP4S(url=env['CP4S_API_ENDPOINT'],
         username=env['CP4S_APIKEY_USERNAME'],
         password=env['CP4S_APIKEY_PASSWORD'])

In [None]:
df = ac.search_df(
    query="[ipv4-addr:value = '127.0.0.1']",
    configs="all")

## Interactive analysis

In [None]:
# label the known-suspicious traffic
df.loc[df['network_traffic_dst_addr'] == '10.0.0.106', 'suspicious'] = 1
df.loc[df['network_traffic_dst_addr'] != '10.0.0.106', 'suspicious'] = 0

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

tdf = df.select_dtypes([np.number])
X = tdf.drop('suspicious', 1)
y = tdf['suspicious']

# split it into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0)
print('Training data size: %d' % X_train.shape[0])
print(' Testing data size: %d' % X_test.shape[0])

In [None]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0)
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)
print('Misclassified  : %d' % (y_test != y_pred).sum())
print('Accuracy (tree): %.2f' % ((y_test == y_pred).sum() / y_test.shape[0]))

In [None]:
from matplotlib import pyplot as plt

# give a pie chart
from sklearn.metrics import accuracy_score
labels = ['Classified', 'Misclassified']
colors = ['green', 'red']
plt.pie([accuracy_score(y_test, y_pred), 1-accuracy_score(y_test, y_pred)], labels= labels, colors=colors, startangle=90, autopct='%.1f%%')
plt.show()

## Open a CP4S Case