# Predict quality of the connection

**About Data**  
This is the data set used for The Third International Knowledge Discovery and Data Mining Tools Competition, which was held in conjunction with KDD-99 The Fifth International Conference on Knowledge Discovery and Data Mining. This dataset contains a standard set of data to be audited, which includes a wide variety of intrusions simulated in a military network environment.

**Goal**   
Build a network intrusion detector, a predictive model capable of distinguishing between bad connections, called intrusions or attacks, and good normal connections. 

1. [Get Data](#getdata)
2. [Data Cleaning](#clean)
3. [Train - Test Split](#split)
4. [Build Parallel XGBoost Classifier](#model)
5. [Evaluate Execuation Speed vs Number of Threads](#eval)
6. [Switch Environment to Default Anaconda S](#switchenv)
7. [Deploy model](#deploy) 
8. [Make Predictions](#predict)

In [25]:
#Import required libraries
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
import time
from matplotlib import pyplot
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

<a id='getdata'></a>
# Get Data

In [None]:
# Gat data from UCI data repo 
data=pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz',header=None)
data.head()


In [None]:
# Assign column names to the dataframe
columns=[]
for i in range(len(data.columns)):
    columns.append('col'+str(i))
columns[41]='connection_type'    
data.columns=columns

data.shape

<a id='clean'></a>
# Data Cleaning

In [None]:
# Check for missing values
missing=data.isnull().sum()
missing[missing>0]

In [None]:
# Remove '.' from target variable - connection_type
data['connection_type']=data['connection_type'].apply(lambda x: x.replace('.',''))
# Convert target into binary output 0 = bad connection & 1= normal/good connection
data['connection_type']=data['connection_type'].apply(lambda x: 1 if x=='normal' else 0)

In [None]:
# Create dummies from categorical variables
data=pd.get_dummies(data,columns=['col1','col2','col3'])

In [None]:
data.shape

In [None]:
# Downsample data
data=data.sample(100000)

<a id='split'></a>
## Train - Test Split

In [24]:
from sklearn.model_selection import train_test_split
X = data.drop('connection_type',axis=1)
y = data.loc[:,['connection_type']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

<a id='model'></a>
## Build Parallel XGBoost Classifier
This is with default anaconda free environment.

In [None]:
#https://machinelearningmastery.com/best-tune-multithreading-support-xgboost-python/

results = []
# Number of threads
num_threads = [1,2,4]

for n in num_threads:
    start = time.time()
    model = XGBClassifier(nthread=n)
    model.fit(X_train, y_train.values.ravel())
    elapsed = time.time() - start
    print(n, elapsed)
    results.append(elapsed)

<a id='eval'></a>
## Evaluate Execuation Speed vs Number of Threads

In [None]:
# plot results
pyplot.plot(num_threads, results)
pyplot.ylabel('Speed (seconds)')
pyplot.xlabel('Number of Threads')
pyplot.title('XGBoost Training Speed vs Number of Threads')
pyplot.show()

<a id='switchenv'></a>
## Switch Environment to Default Anaconda S

In [None]:
for n in num_threads:
    start = time.time()
    model = XGBClassifier(nthread=n)
    model.fit(X, train_encoded_y)
    elapsed = time.time() - start
    print(n, elapsed)
    results.append(elapsed)



In [None]:
# plot results
pyplot.plot(num_threads, results)
pyplot.ylabel('Speed (seconds)')
pyplot.xlabel('Number of Threads')
pyplot.title('XGBoost Training Speed vs Number of Threads')
pyplot.show()

<a id='deploy'></a>
## Deploy model

In [None]:
# Install watson_machine_learning_client library  
!pip install watson_machine_learning_client

In [None]:
#import libraray and insert credentials

from watson_machine_learning_client  import WatsonMachineLearningAPIClient
wml_credentials ={
  "url": "https://ibm-watson-ml.mybluemix.net",
  "access_key": "SSmsbZfPGs2BJUMBf8WdJ3J8RfevGccGFEdYC3Djl2jJ1wOr4d07TnY5oakhr4bzHxGxQ3pIogjgEOjN0TGDTcL0h32gVzPkwMbmHXNpi+FQYUqQmv73SQJrb1WXWeZv",
  "username": "dddc6a95-e944-4f57-90ad-a3271a05d8a0",
  "password": "6abefb19-ab45-4b60-9484-37f2c6276bfb",
  "instance_id": "91b37c51-9e2f-4b62-b227-193125ccbdf2"
}

In [None]:
# Create API client by running below code.
client = WatsonMachineLearningAPIClient(wml_credentials)

In [None]:
#Define model name
model_props = {client.repository.ModelMetaNames.NAME: "XGB Model"}

In [None]:
# Publish model in Watson Machine Learning repository on Cloud
published_model = client.repository.store_model(model=model,meta_props=model_props,training_data=X_train,training_target=y_train.values)

In [None]:
# Get model details 
published_model_uid = client.repository.get_model_uid(published_model)
model_details = client.repository.get_details(published_model_uid)

In [None]:
# load saved model from specified instance of Watson Machine Learning.
loaded_model = client.repository.load(published_model_uid)

<a id='predict'></a>
## Make Predictions

In [None]:
# make predictions on test data set
test_predictions = loaded_model.predict(X_test)

In [None]:
print(test_predictions)

In [None]:
# Create online deployment for published model
created_deployment = client.deployments.create(published_model_uid, "Deployment of XGB Model")

In [None]:
# Get deployment_url by parsing deployment details for last deployed model.
scoring_endpoint = client.deployments.get_scoring_url(created_deployment)

print(scoring_endpoint)

In [None]:
model_details['entity']['input_data_schema']['features']['fields']

In [None]:
scoring=X_train.head(10)

In [None]:
set(X.columns)-set(scoring.columns)

In [None]:
scores_values=[]
for index,row in scoring.iterrows():
    scores_values.append(list(row.values))

In [None]:
#scoring_payload = {"values": scores_values}
scoring_payload = {"fields": list(scoring.columns), "values": scores_values}
#scoring_payload ={"values": scores_values}
predictions = client.deployments.score(scoring_endpoint, scoring_payload)