# Dense Networks

This notebook shows how to apply dense networks into real life data

### Import all the needed modules

In [45]:
from keras.models import Model, load_model
from keras.layers import Dense, Input
from keras.optimizers import SGD
from keras.losses import binary_crossentropy
from keras.activations import relu
from keras.callbacks import ModelCheckpoint, EarlyStopping
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

### Define the path of the data source for convenience

The data can be downloaded here https://www.kaggle.com/c/microsoft-malware-prediction/data

In [4]:
CSV_PATH='train.csv'

### Read the first 10,000 rows of the data

In [5]:
df = pd.read_csv(CSV_PATH, nrows=10000)

### View the loaded dataset

In [6]:
df.head()

Unnamed: 0,MachineIdentifier,ProductName,EngineVersion,AppVersion,AvSigVersion,IsBeta,RtpStateBitfield,IsSxsPassiveMode,DefaultBrowsersIdentifier,AVProductStatesIdentifier,...,Census_FirmwareVersionIdentifier,Census_IsSecureBootEnabled,Census_IsWIMBootEnabled,Census_IsVirtualDevice,Census_IsTouchEnabled,Census_IsPenCapable,Census_IsAlwaysOnAlwaysConnectedCapable,Wdft_IsGamer,Wdft_RegionIdentifier,HasDetections
0,0000028988387b115f69f31a3bf04f09,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1735.0,0,7.0,0,,53447.0,...,36144.0,0,,0.0,0,0,0.0,0.0,10.0,0
1,000007535c3f730efa9ea0b7ef1bd645,win8defender,1.1.14600.4,4.13.17134.1,1.263.48.0,0,7.0,0,,53447.0,...,57858.0,0,,0.0,0,0,0.0,0.0,8.0,0
2,000007905a28d863f6d0d597892cd692,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1341.0,0,7.0,0,,53447.0,...,52682.0,0,,0.0,0,0,0.0,0.0,3.0,0
3,00000b11598a75ea8ba1beea8459149f,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1527.0,0,7.0,0,,53447.0,...,20050.0,0,,0.0,0,0,0.0,0.0,3.0,1
4,000014a5f00daa18e76b81417eeb99fc,win8defender,1.1.15100.1,4.18.1807.18075,1.273.1379.0,0,7.0,0,,53447.0,...,19844.0,0,0.0,0.0,0,0,0.0,0.0,1.0,1


### Extract the labels of the data

In [7]:
y = df['HasDetections'].values

In [8]:
y

array([0, 0, 0, ..., 1, 1, 0], dtype=int64)

### Select a few columns as features

In [14]:
cols_to_use = ['ProductName', 'Platform', 'OsBuild', 'AVProductsInstalled']
X_raw = df[cols_to_use]
X_raw.head(5)

Unnamed: 0,ProductName,Platform,OsBuild,AVProductsInstalled
0,win8defender,windows10,17134,1.0
1,win8defender,windows10,17134,1.0
2,win8defender,windows10,17134,1.0
3,win8defender,windows10,17134,1.0
4,win8defender,windows10,17134,1.0


### Split the data to training and test

In [16]:
train_X_raw, test_X_raw, train_y, test_y = train_test_split(X_raw, y, test_size=0.2, random_state=0, stratify=y)
print(f'train_X_raw: {train_X_raw.shape}')
print(f'test_X_raw: {test_X_raw.shape}')
print(f'train_y: {train_y.shape}')
print(f'test_y: {test_y.shape}')

train_X_raw: (8000, 4)
test_X_raw: (2000, 4)
train_y: (8000,)
test_y: (2000,)


### Define encoders to convert the data to numeric

In [32]:
encoders = [
    OneHotEncoder(sparse=False, handle_unknown='ignore'), # ProductName
    OneHotEncoder(sparse=False, handle_unknown='ignore'), # Platform
    OneHotEncoder(sparse=False, handle_unknown='ignore', categories='auto'), # OsBuild
    StandardScaler(), # AVProductsInstalled
]

### Encode the training data. Note the use of the ```fit_transform``` for the training encoding

In [33]:
train_X = []
for e, c in zip(encoders, train_X_raw.columns):
    column_np = train_X_raw[[c]].values
    column_encoded = e.fit_transform(column_np)
    train_X.append(column_encoded)
train_X = np.concatenate(train_X, axis=1)
train_X.shape

(8000, 27)

### Encode the test data. Not the use of ``transform`` only for the testing encoding

In [35]:
test_X = []
for e, c in zip(encoders, test_X_raw.columns):
    column_np = test_X_raw[[c]].values
    column_encoded = e.transform(column_np)
    test_X.append(column_encoded)
test_X = np.concatenate(test_X, axis=1)
test_X.shape

(2000, 27)

### Create a Keras model to train for training

In [36]:
input_ = Input(shape=(27,))
dense1 = Dense(32, activation=relu)(input_)
dense2 = Dense(32, activation=relu)(dense1)
dense3 = Dense(32, activation=relu)(dense2)
output = Dense(1)(dense3)

In [37]:
model = Model(inputs=input_, outputs=output)

In [38]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 27)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                896       
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 3,041
Trainable params: 3,041
Non-trainable params: 0
_________________________________________________________________


In [39]:
model.compile(SGD(), binary_crossentropy)

### Define an early stop

This allows the training to stop if the validation loss does not improve after ```patience``` number of epochs

In [41]:
es = EarlyStopping(patience=5)

### Define a model checkpoint

This will save the model based on the parameters passed. In this model checkpoint, the best model is saved

In [42]:
BEST_MODEL_PATH = 'best_model.h5'
mc = ModelCheckpoint(BEST_MODEL_PATH, save_best_only=True)

### Train the model

Add a validation split to check for overfitting and provide the early stop and model checkpoint as callbacks

In [44]:
model.fit(train_X, train_y, batch_size=32, epochs=100, validation_split=0.2, callbacks=[es, mc])

Train on 6400 samples, validate on 1600 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


<keras.callbacks.History at 0x1c41fc29c18>

### Load the best model

In [46]:
best_model = load_model(BEST_MODEL_PATH)

### Verify the structure of the loaded model

In [48]:
best_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 27)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                896       
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 3,041
Trainable params: 3,041
Non-trainable params: 0
_________________________________________________________________


### Consume the best model

Prediction and evaluation can be performed on the best model

In [55]:
test_y_pred = best_model.predict(test_X)

In [57]:
best_model.evaluate(test_X, test_y)



0.6934083604812622