### 1. Imports and Definitions

In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_selection import SelectKBest, f_classif
from sklearn import svm
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight

import warnings
warnings.filterwarnings('ignore')

from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam

from datetime import datetime
import matplotlib.pyplot as plt

ARCHIV = 'Archiv/'
ARCHIV2016 = 'Archiv2016/'

Using TensorFlow backend.


### 2. Data Manipulation Library

In [2]:
class DataManipulation(object):
    
    def __init__(self, filename, sep_=';', dec_=','):

        self.data = pd.read_csv(filename, sep=sep_, decimal=dec_).dropna()
        self.data_array = self.data.values[:,1:].astype(dtype=float)
        
    def visualise_data(self):
        '''
            Read a given file into a pandas dataframe.
            Then, plot the various features against 
            values of frequency.
            :param: 
                None
            :returns:
                None
        '''
        self.data.plot(x='nm', y=self.data.columns[1:], kind='line', figsize=(20, 15))

    def augment_dataset(self, target):
        '''
            Augment the given dataset with the supplied
            target label value.
            :param:
                target: Target label 
            :returns:
                self.data_array: Augmented dataset
        '''
        self.data_array = np.hstack((self.data_array, \
                                     np.full((self.data_array.shape[0], 1), target)))
        return self.data_array

    def extract_features(self, no_of_features):
        '''
            Extract usable features from the dataset.
            :param:
                no_of_features: Number of features to be extracted
            :returns:
                extracted_features: Extracted features
        '''
        X = self.data_array[:, :-1]
        y = self.data_array[:, -1]

        test = SelectKBest(score_func=f_classif, k=no_of_features)
        fit = test.fit(X, y)
        extracted_features = np.hstack((fit.transform(X), y.reshape(self.data_array.shape[0], 1)))
        
        return extracted_features
        
    def cumulative_dataset(data_):
        '''
            Cumulate the data into one single data
            form.
            :param: 
                data_: Datasets to be accumulated
            :returns:
                dataset_: Complete dataset
        '''
        dataset_ = np.empty((0, data_[0].shape[1]))
        
        for i in data_:
            dataset_ = np.vstack((dataset_, i))
        
        return dataset_

### 3. Intelligence Library

In [3]:
class Intelligence(object):
    
    def __init__(self, negative_, positive_, no_test_points):
        
        np.random.shuffle(negative_)
        np.random.shuffle(positive_)
        self.train_data = np.vstack((negative_[:no_test_points, :-1], \
                                     positive_[:no_test_points, :-1]))
        self.train_labels = np.hstack((negative_[:no_test_points, -1], \
                                       positive_[:no_test_points, -1]))
        self.test_data = np.vstack((negative_[no_test_points:, :-1], \
                                    positive_[no_test_points:, :-1]))
        self.test_labels = np.hstack((negative_[no_test_points:, -1], \
                                      positive_[no_test_points:, -1]))

    def svm_(self, kernel):
        '''
            Implement SVM with user defined kernel.
            :param:
                kernel: SVM kernel to be used
            :returns:
                None
        '''
        start_ = datetime.timestamp(datetime.now())
        svm_classifier = svm.SVC(C=500, kernel=kernel)
        svm_classifier.fit(self.train_data, self.train_labels)
        stop_ = datetime.timestamp(datetime.now())
        self.time_ = stop_ - start_
        self.__print_results(model=svm_classifier, desc='SVM classifier with '+kernel+' kernel')

    def mlp_(self):
        '''
            Implement MLP with 1 input layer, 3 hidden
            layer and 1 output layer.
            :param: 
                None
            :returns:
                None
        '''
        start_ = datetime.timestamp(datetime.now())
        input_ = Input(shape=(self.train_data.shape[1],))
        hidden_1 = Dense(6, activation='sigmoid')(input_)
        hidden_2 = Dense(6, activation='sigmoid')(hidden_1)
        hidden_3 = Dense(6, activation='sigmoid')(hidden_2)
        output_ = Dense(1, activation='sigmoid')(hidden_3)
        mlp_model = Model(input_, output_)

        mlp_model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.01))

        weights = compute_class_weight('balanced', np.array([0,1]), self.train_labels)
        mlp_model.fit(self.train_data, self.train_labels, 
                      batch_size=10, epochs=100, class_weight={0:weights[0],1:weights[1]})
        predicted_labels = mlp_model.predict(self.test_data)
        predicted_labels = np.array([i > 0.5 for i in predicted_labels])
        stop_ = datetime.timestamp(datetime.now())
        self.time_ = stop_ - start_
        self.__print_results(model=predicted_labels, desc='MLP using 1-3-1 network')
        
    def __print_results(self, model, desc):
        '''
            Print the results to verify network.
            :param:
                model: Model used for classification
                desc: Description
            :returns:
                None
        '''
        model = model.predict(self.test_data) if desc[:3]=='SVM' else model
        print (desc)
        print ('-'*50)
        print ('='*5+' Confusion Matrix '+'='*5+'\n', confusion_matrix(self.test_labels, model, [1,0]))
        print ('='*5+' Precision '+'='*5+'\n {:.2f}'.format(precision_score(self.test_labels, model, [1,0])*100))
        print ('='*5+' Recall '+'='*5+'\n {:.2f}'.format(recall_score(self.test_labels, model, [1,0])))
        print ('='*5+' Execution Time '+'='*5+'\n {:.5f} sec'.format(self.time_))
        print ('-'*50+'\n')

### 4. Using *Archiv* dataset

In [4]:
## Get data into python
stoff = DataManipulation(ARCHIV+'Stoff.csv')
stoff.augment_dataset(target=0)
stoff_features = stoff.extract_features(no_of_features=6)

fleisch = DataManipulation(ARCHIV+'Fleisch.csv')
fleisch.augment_dataset(target=0)
fleisch_features = fleisch.extract_features(no_of_features=6)

holz = DataManipulation(ARCHIV+'Holz.csv')
holz.augment_dataset(target=0)
holz_features = holz.extract_features(no_of_features=6)

leder = DataManipulation(ARCHIV+'Leder.csv')
leder.augment_dataset(target=0)
leder_features = leder.extract_features(no_of_features=6)

non_skin_dataset = DataManipulation.cumulative_dataset([stoff_features, \
                                                        fleisch_features, \
                                                        holz_features, \
                                                        leder_features])

referenz = DataManipulation(ARCHIV+'Referenz-Haut_6-Klassen.csv')
skin_dataset = referenz.augment_dataset(target=1)

In [5]:
intelligence_client = Intelligence(non_skin_dataset, skin_dataset, 50)
intelligence_client.svm_(kernel='linear')
intelligence_client.svm_(kernel='rbf')

SVM classifier with linear kernel
--------------------------------------------------
===== Confusion Matrix =====
 [[ 71   0]
 [  6 428]]
===== Precision =====
 92.21
===== Recall =====
 1.00
===== Execution Time =====
 0.00589 sec
--------------------------------------------------

SVM classifier with rbf kernel
--------------------------------------------------
===== Confusion Matrix =====
 [[ 60  11]
 [  0 434]]
===== Precision =====
 100.00
===== Recall =====
 0.85
===== Execution Time =====
 0.00115 sec
--------------------------------------------------



In [6]:
intelligence_client.mlp_()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoc

### 5. Using *Archiv/2016* dataset

In [7]:
## Get data into python
material = DataManipulation(ARCHIV2016+'2016material.csv')
material.augment_dataset(target=0)
material_features = material.extract_features(no_of_features=15)

material_fake = DataManipulation(ARCHIV2016+'2016material-fake.csv')
material_fake.augment_dataset(target=0)
material_fake_features = material_fake.extract_features(no_of_features=15)

non_skin_dataset = DataManipulation.cumulative_dataset([material_features, \
                                                        material_fake_features])

skin = DataManipulation(ARCHIV2016+'2016skin.csv')
skin.augment_dataset(target=1)
skin_dataset = skin.extract_features(no_of_features=15)

In [8]:
intelligence_client = Intelligence(non_skin_dataset, skin_dataset, 50)
intelligence_client.svm_(kernel='linear')
intelligence_client.svm_(kernel='rbf')

SVM classifier with linear kernel
--------------------------------------------------
===== Confusion Matrix =====
 [[ 971    0]
 [   0 1992]]
===== Precision =====
 100.00
===== Recall =====
 1.00
===== Execution Time =====
 0.00098 sec
--------------------------------------------------

SVM classifier with rbf kernel
--------------------------------------------------
===== Confusion Matrix =====
 [[ 939   32]
 [   0 1992]]
===== Precision =====
 100.00
===== Recall =====
 0.97
===== Execution Time =====
 0.00232 sec
--------------------------------------------------



In [9]:
intelligence_client.mlp_()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78