In [0]:
import keras.backend as K
import tensorflow as tf
from keras import initializers, layers, regularizers
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import multi_gpu_model
import numpy as np
import datetime, os

Using TensorFlow backend.


## Uploading Training Set (From local machine)

In [0]:
from google.colab import files
uploaded = files.upload()


Saving AComp1.csv to AComp1.csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['AComp1.csv']))

In [0]:
df.head()

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Label
0,5.41,0.0,8.11,11.49,6.08,3.38,2.03,9.46,13.51,10.14,5.41,2.7,2.03,4.05,4.05,2.03,2.7,5.41,0.0,2.03,1
1,6.88,0.0,5.62,8.75,5.0,8.75,2.5,8.12,9.38,5.62,1.25,5.62,5.0,1.25,1.88,6.88,5.62,7.5,0.0,4.38,1
2,9.84,0.26,5.96,5.44,5.18,5.7,0.78,5.18,6.99,10.1,2.85,4.4,3.63,3.63,2.07,7.77,10.62,4.92,0.52,4.15,1
3,3.91,1.95,0.33,1.63,3.26,2.28,1.3,5.21,0.65,8.14,0.65,1.95,15.64,34.2,1.95,5.21,2.28,5.21,0.33,3.91,1
4,6.99,6.99,1.4,6.99,3.5,4.9,0.7,3.5,2.1,9.79,4.2,5.59,9.79,6.29,4.2,5.59,6.29,5.59,1.4,4.2,1


In [0]:
df.shape

(16120, 21)

## Uploading the Test set (on local machine)

In [0]:
from google.colab import files
uploaded2 = files.upload()


Saving test_AComp1.csv to test_AComp1.csv


In [0]:
df2 = pd.read_csv(io.BytesIO(uploaded2['test_AComp1.csv']))

In [0]:
df2.head()

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Label
0,5.77,4.62,3.85,5.77,2.31,8.85,3.46,3.46,6.92,10.38,3.46,3.46,6.92,3.46,3.46,5.77,5.77,7.31,3.08,1.92,1
1,5.99,2.3,8.29,5.99,6.91,7.83,0.92,7.83,5.99,7.83,0.92,4.61,4.61,2.3,5.07,7.37,5.07,7.37,0.92,1.84,1
2,11.49,6.76,3.38,4.05,2.7,6.76,2.03,4.05,2.03,10.14,2.7,2.7,6.76,5.41,6.08,6.08,5.41,6.08,0.68,4.73,1
3,10.45,5.97,3.73,2.24,0.75,6.72,2.24,4.48,3.73,10.45,2.99,7.46,5.22,4.48,5.97,8.96,3.73,8.96,0.0,1.49,1
4,5.04,6.2,3.1,9.3,1.94,6.59,0.78,5.43,2.71,5.81,1.94,2.71,2.71,17.05,9.69,9.3,3.49,4.65,0.0,1.55,1


In [0]:
df2.shape

(4030, 21)

# Defining the Model

In [0]:

class Length(layers.Layer):
    """
    Compute the length of vectors. This is used to compute a Tensor that has the same shape with y_true in margin_loss
    inputs: shape=[dim_1, ..., dim_{n-1}, dim_n]
    output: shape=[dim_1, ..., dim_{n-1}]
    """
    def call(self, inputs, **kwargs):
        return K.sqrt(K.sum(K.square(inputs), -1))

    def compute_output_shape(self, input_shape):
        return input_shape[:-1]

class Mask(layers.Layer):
    """
    Mask a Tensor with shape=[None, d1, d2] by the max value in axis=1.
    Output shape: [None, d2]
    """
    def call(self, inputs, **kwargs):
        # use true label to select target capsule, shape=[batch_size, num_capsule]
        if type(inputs) is list:  # true label is provided with shape = [batch_size, n_classes], i.e. one-hot code.
            assert len(inputs) == 2
            inputs, mask = inputs
        else:  # if no true label, mask by the max length of vectors of capsules
            x = inputs
            # Enlarge the range of values in x to make max(new_x)=1 and others < 0
            x = (x - K.max(x, 1, True)) / K.epsilon() + 1
            mask = K.clip(x, 0, 1)  # the max value in x clipped to 1 and other to 0

        # masked inputs, shape = [batch_size, dim_vector]
        inputs_masked = K.batch_dot(inputs, mask, [1, 1])
        return inputs_masked

    def compute_output_shape(self, input_shape):
        if type(input_shape[0]) is tuple:  # true label provided
            return tuple([None, input_shape[0][-1]])
        else:
            return tuple([None, input_shape[-1]])


def squash(vectors, axis=-1):
    """
    The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0
    :param vectors: some vectors to be squashed, N-dim tensor
    :param axis: the axis to squash
    :return: a Tensor with same shape as input vectors
    """
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm)
    return scale * vectors


class CapsuleLayer(layers.Layer):
    """
    The capsule layer. It is similar to Dense layer. Dense layer has `in_num` inputs, each is a scalar, the output of the 
    neuron from the former layer, and it has `out_num` output neurons. CapsuleLayer just expand the output of the neuron
    from scalar to vector. So its input shape = [None, input_num_capsule, input_dim_vector] and output shape = \
    [None, num_capsule, dim_vector]. For Dense Layer, input_dim_vector = dim_vector = 1.
    
    :param num_capsule: number of capsules in this layer
    :param dim_vector: dimension of the output vectors of the capsules in this layer
    :param num_routings: number of iterations for the routing algorithm
    """
    def __init__(self, num_capsule, dim_vector, num_routing=3,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_vector = dim_vector
        self.num_routing = num_routing
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

    def build(self, input_shape):
        assert len(input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_vector]"
        self.input_num_capsule = input_shape[1]
        self.input_dim_vector = input_shape[2]

        # Transform matrix
        self.W = self.add_weight(shape=[self.input_num_capsule, self.num_capsule, self.input_dim_vector, self.dim_vector],
                                 initializer=self.kernel_initializer,
                                 name='W')

        # Coupling coefficient. The redundant dimensions are just to facilitate subsequent matrix calculation.
        self.bias = self.add_weight(shape=[1, self.input_num_capsule, self.num_capsule, 1, 1],
                                    initializer=self.bias_initializer,
                                    name='bias',
                                    trainable=False)
        self.built = True

    def call(self, inputs, training=None):
        # inputs.shape=[None, input_num_capsule, input_dim_vector]
        # Expand dims to [None, input_num_capsule, 1, 1, input_dim_vector]
        inputs_expand = K.expand_dims(K.expand_dims(inputs, 2), 2)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # Now it has shape = [None, input_num_capsule, num_capsule, 1, input_dim_vector]
        inputs_tiled = K.tile(inputs_expand, [1, 1, self.num_capsule, 1, 1])

        """  
        # Compute `inputs * W` by expanding the first dim of W. More time-consuming and need batch_size.
        # Now W has shape  = [batch_size, input_num_capsule, num_capsule, input_dim_vector, dim_vector]
        w_tiled = K.tile(K.expand_dims(self.W, 0), [self.batch_size, 1, 1, 1, 1])
        
        # Transformed vectors, inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = K.batch_dot(inputs_tiled, w_tiled, [4, 3])
        """
        # Compute `inputs * W` by scanning inputs_tiled on dimension 0. This is faster but requires Tensorflow.
        # inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = tf.scan(lambda ac, x: K.batch_dot(x, self.W, [3, 2]),
                             elems=inputs_tiled,
                             initializer=K.zeros([self.input_num_capsule, self.num_capsule, 1, self.dim_vector]))
        """
        # Routing algorithm V1. Use tf.while_loop in a dynamic way.
        def body(i, b, outputs):
            c = tf.nn.softmax(self.bias, dim=2)  # dim=2 is the num_capsule dimension
            outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True))
            b = b + K.sum(inputs_hat * outputs, -1, keepdims=True)
            return [i-1, b, outputs]

        cond = lambda i, b, inputs_hat: i > 0
        loop_vars = [K.constant(self.num_routing), self.bias, K.sum(inputs_hat, 1, keepdims=True)]
        _, _, outputs = tf.while_loop(cond, body, loop_vars)
        """
        # Routing algorithm V2. Use iteration. V2 and V1 both work without much difference on performance
        assert self.num_routing > 0, 'The num_routing should be > 0.'
        for i in range(self.num_routing):
            c = tf.nn.softmax(self.bias, dim=2)  # dim=2 is the num_capsule dimension
            # outputs.shape=[None, 1, num_capsule, 1, dim_vector]
            outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True))

            # last iteration needs not compute bias which will not be passed to the graph any more anyway.
            if i != self.num_routing - 1:
                # self.bias = K.update_add(self.bias, K.sum(inputs_hat * outputs, [0, -1], keepdims=True))
                self.bias += K.sum(inputs_hat * outputs, -1, keepdims=True)
            # tf.summary.histogram('BigBee', self.bias)  # for debugging
        return K.reshape(outputs, [-1, self.num_capsule, self.dim_vector])

    def compute_output_shape(self, input_shape):
        return tuple([None, self.num_capsule, self.dim_vector])


def PrimaryCap(inputs, dim_vector, n_channels, kernel_size, strides, padding):
    """
    Apply Conv1D `n_channels` times and concatenate all capsules
    :param inputs: 4D tensor, shape=[None, width, height, channels]
    :param dim_vector: the dim of the output vector of capsule
    :param n_channels: the number of types of capsules
    :return: output tensor, shape=[None, num_capsule, dim_vector]
    """
    output = layers.Conv1D(filters=dim_vector*n_channels, kernel_size=kernel_size, strides=strides, padding=padding)(inputs)
    outputs = layers.Reshape(target_shape=[-1, dim_vector])(output)
    return layers.Lambda(squash)(outputs)


In [0]:
# BUILDING THE MODEL

from keras import layers, models
from keras import backend as K
from keras.utils import to_categorical
from keras import callbacks


def CapsNet(input_shape, n_class, num_routing):
    """
    A Capsule Network on MNIST.
    :param input_shape: data shape, 4d, [None, width, height, channels]
    :param n_class: number of classes
    :param num_routing: number of routing iterations
    :return: A Keras Model with 2 inputs and 2 outputs
    """
    x = layers.Input(shape=input_shape)
    print(input_shape, x.shape)

    # Layer 1: Just a conventional Conv1D layer
    #Add Batch Norm before Activation
    
    bn = layers.normalization.BatchNormalization()(x)
    conv1 = layers.Conv1D(filters=32, kernel_size=4, strides=1, padding='valid', activation='relu', name='conv1')(bn)

    # Layer 2: Conv1D layer with `squash` activation, then reshape to [None, num_capsule, dim_vector]
    primarycaps = PrimaryCap(conv1, dim_vector=8, n_channels=32, kernel_size=9, strides=2, padding='valid')

    # Layer 3: Capsule layer. Routing algorithm works here.
    digitcaps = CapsuleLayer(num_capsule=n_class, dim_vector=16, num_routing=num_routing, name='digitcaps')(primarycaps)

    # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape.
    # If using tensorflow, this will not be necessary. :)
    out_caps = Length(name='out_caps')(digitcaps)

    # Decoder network.
    y = layers.Input(shape=(n_class,))
    masked = Mask()([digitcaps, y])  # The true label is used to mask the output of capsule layer.
    bn2 = layers.BatchNormalization()(masked)
    x_recon = layers.Dense(64, activation='relu')(bn2)
    x_recon = layers.Dense(128, activation='relu')(x_recon)
    #x_recon = layers.Dropout(0.5)(x_recon)
    bn3 = layers.BatchNormalization()(x_recon)
    x_recon = layers.Dense(20, activation='sigmoid')(bn3)
    #x_recon = layers.Flatten()(x_recon)
    x_recon = layers.Reshape(target_shape=[20, 1], name='out_recon')(x_recon)
    

    # two-input-two-output keras Model
    return models.Model([x, y], [out_caps, x_recon])


## Defining the Loss Function

def margin_loss(y_true, y_pred):
    """
    Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it.
    :param y_true: [None, n_classes]
    :param y_pred: [None, num_capsule]
    :return: a scalar loss value.
    """
    L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1))

    return K.mean(K.sum(L, 1))


In [0]:
def train(model, data, epoch_size_frac=1.0):
    """
    Training a CapsuleNet
    :param model: the CapsuleNet model
    :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))`
    :param args: arguments
    :return: The trained model
    """
    # unpacking the data
    #(x_train, y_train), (x_test, y_test) = data
    (x_train, y_train) = data
    
    global y_pred

    # callbacks
    log = callbacks.CSVLogger('log.csv')
    checkpoint = callbacks.ModelCheckpoint('weights-{epoch:02d}.h5',
                                           save_best_only=True, save_weights_only=True, verbose=10)
    lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001 * np.exp(-epoch / 10.))
    
    logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    tb = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
    
    cb = callbacks.EarlyStopping(monitor = 'loss', mode = 'min', patience = 15, restore_best_weights=True)

    # compile the model
    model.compile(optimizer='adam',
                  loss=[margin_loss, 'mse'],
                  loss_weights=[1., 0.0005],
                  metrics={'out_caps': 'accuracy'})
    
    
    ''''model.fit([x_train, y_train], [y_train, x_train], batch_size=32, epochs=2,
              validation_data=[[x_test, y_test], [y_test, x_test]])'''
    
    model.fit([x_train, y_train], [y_train, x_train], batch_size=32, epochs=150,validation_split = 0.2, callbacks = [cb])
              
    
    
    model.save_weights('trained_model.h5')
    print('Trained model saved to \'trained_model.h5\'')
    
    return model

In [0]:
  model = CapsNet(input_shape=[20, 1],
                n_class=2,
                num_routing=3)

[20, 1] (?, 20, 1)


In [0]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           (None, 20, 1)        0                                            
__________________________________________________________________________________________________
conv1 (Conv1D)                  (None, 17, 32)       160         input_15[0][0]                   
__________________________________________________________________________________________________
conv1d_8 (Conv1D)               (None, 5, 256)       73984       conv1[0][0]                      
__________________________________________________________________________________________________
reshape_8 (Reshape)             (None, 160, 8)       0           conv1d_8[0][0]                   
__________________________________________________________________________________________________
lambda_8 (

## Working with Data

In [0]:
x_train = df.iloc[:,0:20].values
y_train = df.iloc[:,20].values

In [0]:
import numpy as np
unique, counts = np.unique(y_train, return_counts=True)
dict(zip(unique, counts))

{0: 8060, 1: 8060}

In [0]:
x_test = df2.iloc[:, 0:20].values
y_test = df2.iloc[:, 20].values

In [0]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((16120, 20), (4030, 20), (16120,), (4030,))

In [0]:
#Reshaping the feature samples
x_train_reshape = x_train.reshape(16120, 20, 1)
y_train_reshape = y_train.reshape(16120, 1)
#x_test_reshape = x_test.reshape(4030, 20, 1)
#y_test_reshape = y_test.reshape(4030, 1)


In [0]:
#Reshaping the labels
y_train_ = tf.keras.utils.to_categorical(y_train_reshape,num_classes=2)
#y_test_ = tf.keras.utils.to_categorical(y_test_reshape,num_classes=2)

In [0]:
train(model=model, data=((x_train_reshape, y_train_)), epoch_size_frac = 0.5)

Train on 12896 samples, validate on 3224 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/15

<keras.engine.training.Model at 0x7f3e50abbf28>

# Testing on the data

In [0]:
x_test = df2.iloc[:, 0:20].values
y_test = df2.iloc[:, 20].values

In [0]:
layer_name = 'out_caps'
intermediate_layer_model = models.Model(inputs=model.input[0],
                                 outputs=model.get_layer(layer_name).output)

In [0]:
x_test.shape

(4030, 20)

In [0]:
x_test_reshape = x_test.reshape(4030, 20, 1)


In [0]:
x_test_reshape.shape

(4030, 20, 1)

In [0]:
y_pred = intermediate_layer_model.predict(x_test_reshape) 

In [0]:
cm3 = confusion_matrix(y_test, np.argmax(y_pred, axis=1))
cm3

array([[1931,   84],
       [  86, 1929]])

In [0]:
np.argmax(y_pred, axis = 1)

array([1, 1, 1, ..., 1, 0, 0])

In [0]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test, np.argmax(y_pred, axis = 1))

0.9397022332506204

## Testing Data

## Uploading another file as an independent test set

In [0]:
from google.colab import files
uploaded2 = files.upload()


Saving test_AComp2.csv to test_AComp2.csv


In [0]:
import pandas as pd
import io
df3 = pd.read_csv(io.BytesIO(uploaded2['test_AComp2.csv']))

In [0]:
x_test_data = df3.iloc[:, 0:20].values
y_test_data = df3.iloc[:, 20].values

In [0]:
y_true = y_test_data

In [0]:
layer_name = 'out_caps'
intermediate_layer_model2 = models.Model(inputs=model.input[0],
                                 outputs=model.get_layer(layer_name).output)

In [0]:
x_test_data.shape

(4030, 20)

In [0]:
x_test_reshape2 = x_test_data.reshape(4030, 20, 1)


In [0]:
y_pred = intermediate_layer_model2.predict(x_test_reshape2)

In [0]:
cm = confusion_matrix(np.argmax(y_test_, axis=1), np.argmax(y_pred, axis=1))
cm

array([[1851,  164],
       [  79, 1936]])

In [0]:
y_true.shape, y_pred.shape

((4030,), (4030, 2))

In [0]:
y_pred

array([[0.32652962, 0.9261913 ],
       [0.1351722 , 0.8690385 ],
       [0.02157693, 0.8851954 ],
       ...,
       [0.8924629 , 0.12365953],
       [0.8798937 , 0.58914596],
       [0.9349014 , 0.22312114]], dtype=float32)

In [0]:
final_predNew = (y_pred >= 0.5).astype(np.int)


In [0]:
final_predNew

array([[0, 1],
       [0, 1],
       [0, 1],
       ...,
       [1, 0],
       [1, 1],
       [1, 0]])

# Gradient Boosting Classification

In [0]:
from sklearn.ensemble import GradientBoostingClassifier
gbr = GradientBoostingClassifier(n_estimators=1000, learning_rate=0.01,max_depth=5, random_state=0)

In [0]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
scores_gbr_mse = cross_val_score(gbr,x_train , y_train, cv=5, scoring='accuracy')

In [0]:
scores_gbr_mse

array([0.9351737 , 0.93145161, 0.9280397 , 0.92307692, 0.9233871 ])

In [0]:
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier()

In [0]:
from sklearn.model_selection import GridSearchCV


In [0]:
parameters = {'n_estimators':(2000, 2500, 3000, 3500, 4000)}

In [0]:
clf1 = GridSearchCV(gbr, parameters, cv = 5)
clf2 = GridSearchCV(abc, parameters, cv = 5)

In [0]:
clf1.fit(x_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=GradientBoostingClassifier(criterion='friedman_mse',
                                                  init=None, learning_rate=0.01,
                                                  loss='deviance', max_depth=5,
                                                  max_features=None,
                                                  max_leaf_nodes=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=0.0,
                                                  n_estimators=1000,
                                                  n_iter_no_change=None,
                                                  presor

In [0]:
y_pred1 = clf1.predict(x_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred1)

0.9506203473945409

In [0]:
clf2.fit(x_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=AdaBoostClassifier(algorithm='SAMME.R',
                                          base_estimator=None,
                                          learning_rate=1.0, n_estimators=50,
                                          random_state=None),
             iid='warn', n_jobs=None,
             param_grid={'n_estimators': (2000, 2500, 3000, 3500, 4000)},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [0]:
y_pred2 = clf2.predict(x_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred2)

0.9022332506203474

# Training and Testing on Set 2

In [0]:
from google.colab import files
uploaded = files.upload()


Saving AComp2.csv to AComp2.csv


In [0]:
import pandas as pd
import io
df3 = pd.read_csv(io.BytesIO(uploaded['AComp2.csv']))

In [0]:
from google.colab import files
uploaded2 = files.upload()


Saving test_AComp2.csv to test_AComp2.csv


In [0]:
df4 = pd.read_csv(io.BytesIO(uploaded2['test_AComp2.csv']))

In [0]:
x_train2 = df3.iloc[:,0:20].values
y_train2 = df3.iloc[:,20].values

In [0]:
x_test2 = df4.iloc[:,0:20].values
y_test2 = df4.iloc[:,20].values

In [0]:
#Reshaping the feature samples
x_train_reshape2 = x_train2.reshape(16120, 20, 1)
y_train_reshape2 = y_train2.reshape(16120, 1)
#x_test_reshape = x_test.reshape(4030, 20, 1)
#y_test_reshape = y_test.reshape(4030, 1)


In [0]:
#Reshaping the labels
y_train2_ = tf.keras.utils.to_categorical(y_train_reshape2,num_classes=2)
#y_test_ = tf.keras.utils.to_categorical(y_test_reshape,num_classes=2)

In [0]:
  model2 = CapsNet(input_shape=[20, 1],
                n_class=2,
                num_routing=3)

[20, 1] (?, 20, 1)


In [0]:
train(model=model2, data=((x_train_reshape2, y_train2_)), epoch_size_frac = 0.5)

In [0]:
layer_name = 'out_caps'
intermediate_layer_model2 = models.Model(inputs=model2.input[0],
                                 outputs=model2.get_layer(layer_name).output)

In [0]:
x_test_reshape2 = x_test2.reshape(4030, 20, 1)

In [0]:
y_pred2 = intermediate_layer_model2.predict(x_test_reshape2)

In [0]:
cm2 = confusion_matrix(y_test2, np.argmax(y_pred2, axis=1))
cm2

array([[1898,  117],
       [ 105, 1910]])

# Training and Testing on Set 3

In [0]:
model3 = CapsNet(input_shape=[20, 1],
                n_class=2,
                num_routing=3)

[20, 1] (?, 20, 1)


## Loading the training and testing sets

In [0]:
from google.colab import files
uploaded = files.upload()


Saving AComp3.csv to AComp3.csv


In [0]:
import pandas as pd
import io
df4 = pd.read_csv(io.BytesIO(uploaded['AComp3.csv']))

In [0]:
df4.head()

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Label
0,5.41,0.0,8.11,11.49,6.08,3.38,2.03,9.46,13.51,10.14,5.41,2.7,2.03,4.05,4.05,2.03,2.7,5.41,0.0,2.03,1
1,6.88,0.0,5.62,8.75,5.0,8.75,2.5,8.12,9.38,5.62,1.25,5.62,5.0,1.25,1.88,6.88,5.62,7.5,0.0,4.38,1
2,9.84,0.26,5.96,5.44,5.18,5.7,0.78,5.18,6.99,10.1,2.85,4.4,3.63,3.63,2.07,7.77,10.62,4.92,0.52,4.15,1
3,3.91,1.95,0.33,1.63,3.26,2.28,1.3,5.21,0.65,8.14,0.65,1.95,15.64,34.2,1.95,5.21,2.28,5.21,0.33,3.91,1
4,6.99,6.99,1.4,6.99,3.5,4.9,0.7,3.5,2.1,9.79,4.2,5.59,9.79,6.29,4.2,5.59,6.29,5.59,1.4,4.2,1


In [0]:
uploaded = files.upload()


Saving test_AComp3.csv to test_AComp3.csv


In [0]:
df5 = pd.read_csv(io.BytesIO(uploaded['test_AComp3.csv']))

In [0]:
x_train3 = df4.iloc[:, 0:20].values
y_train3 = df4.iloc[:, 20].values

In [0]:
x_test3 = df5.iloc[:, 0:20].values
y_test3 = df5.iloc[:, 20].values

In [0]:
#Reshaping the feature samples
x_train_reshape3 = x_train3.reshape(16120, 20, 1)
y_train_reshape3 = y_train3.reshape(16120, 1)
#x_test_reshape = x_test.reshape(4030, 20, 1)
#y_test_reshape = y_test.reshape(4030, 1)


In [0]:
#Reshaping the Labels
y_train3_ = tf.keras.utils.to_categorical(y_train_reshape3,num_classes=2)


In [0]:
train(model=model3, data=((x_train_reshape3, y_train3_)), epoch_size_frac = 0.5)

Instructions for updating:
Use tf.cast instead.
Train on 12896 samples, validate on 3224 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Ep

<keras.engine.training.Model at 0x7f3e68003518>

In [0]:
# Testing
x_test_reshape3 = x_test3.reshape(4030, 20, 1)

In [0]:
layer_name = 'out_caps'
intermediate_layer_model3 = models.Model(inputs=model3.input[0],
                                 outputs=model3.get_layer(layer_name).output)

In [0]:
y_pred3 = intermediate_layer_model3.predict(x_test_reshape3)

In [0]:
cm3 = confusion_matrix(y_test3, np.argmax(y_pred3, axis=1))
cm3

array([[1938,   77],
       [ 103, 1912]])

# Training and Testing on Set 4

In [0]:
uploaded = files.upload()


Saving AComp4.csv to AComp4.csv


In [0]:
df6 = pd.read_csv(io.BytesIO(uploaded['AComp4.csv']))

In [0]:
uploaded = files.upload()


Saving test_AComp4.csv to test_AComp4.csv


In [0]:
df7 = pd.read_csv(io.BytesIO(uploaded['test_AComp4.csv']))

In [0]:
x_train4 = df6.iloc[:, 0:20].values
y_train4 = df6.iloc[:, 20].values
x_test4 = df7.iloc[:, 0:20].values

In [0]:
x_train4.shape, y_train4.shape, x_test4.shape

((16120, 20), (16120,), (4030, 20))

In [0]:
#Reshaping the train and test set
x_train_reshape4 = x_train4.reshape(16120, 20, 1)
y_train_reshape4 = y_train4.reshape(16120, 1)
x_test_reshape4 = x_test4.reshape(4030, 20, 1)

In [0]:
#Reshaping the Labels
y_train4_ = tf.keras.utils.to_categorical(y_train_reshape4,num_classes=2)


In [0]:
model4 = CapsNet(input_shape=[20, 1],
                n_class=2,
                num_routing=3)

[20, 1] (?, 20, 1)


In [0]:
train(model=model4, data=((x_train_reshape4, y_train4_)), epoch_size_frac = 0.5)

Train on 12896 samples, validate on 3224 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

<keras.engine.training.Model at 0x7f3e504fde10>

In [0]:
layer_name = 'out_caps'
intermediate_layer_model4 = models.Model(inputs=model4.input[0],
                                 outputs=model4.get_layer(layer_name).output)

In [0]:
y_pred4 = intermediate_layer_model4.predict(x_test_reshape4)

In [0]:
cm4 = confusion_matrix(y_test4, np.argmax(y_pred4, axis=1))
cm4

array([[1842,  173],
       [  64, 1951]])

# Training and Testing Set 5

In [0]:
model5 = CapsNet(input_shape=[20, 1],
                n_class=2,
                num_routing=3)

[20, 1] (?, 20, 1)


In [0]:
uploaded = files.upload()

Saving AComp5.csv to AComp5.csv


In [0]:
df8 = pd.read_csv(io.BytesIO(uploaded['AComp5.csv']))

In [0]:
uploaded = files.upload()

Saving test_AComp5.csv to test_AComp5.csv


In [0]:
df9 = pd.read_csv(io.BytesIO(uploaded['test_AComp5.csv']))

In [0]:
x_train5 = df8.iloc[:, 0:20].values
y_train5 = df8.iloc[:, 20].values
x_test5 = df9.iloc[:, 0:20].values
y_test5 = df9.iloc[:, 20].values

In [0]:
#Reshaping the train and test set
x_train_reshape5 = x_train5.reshape(16120, 20, 1)
y_train_reshape5 = y_train5.reshape(16120, 1)
x_test_reshape5 = x_test5.reshape(4030, 20, 1)

In [0]:
#Reshaping the Labels
y_train5_ = tf.keras.utils.to_categorical(y_train_reshape5,num_classes=2)


In [0]:
train(model=model5, data=((x_train_reshape5, y_train5_)), epoch_size_frac = 0.5)

Train on 12896 samples, validate on 3224 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/15

<keras.engine.training.Model at 0x7f3e195affd0>

In [0]:
layer_name = 'out_caps'
intermediate_layer_model5 = models.Model(inputs=model5.input[0],
                                 outputs=model5.get_layer(layer_name).output)

In [0]:
y_pred5 = intermediate_layer_model5.predict(x_test_reshape5)

In [0]:
cm5 = confusion_matrix(y_test5, np.argmax(y_pred5, axis=1))
cm5

array([[1920,   95],
       [ 128, 1887]])

# Using Combined Data

In [0]:
upload = files.upload()

Saving Complete_Data_Allergens.csv to Complete_Data_Allergens.csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(upload['Complete_Data_Allergens.csv']))

In [0]:
df.head()

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,Labels
0,9.88,3.7,8.02,9.26,3.09,4.32,0.62,3.09,5.56,6.17,1.23,6.17,9.26,4.32,3.09,12.35,6.17,2.47,0.62,0.62,0
1,6.24,2.72,6.24,4.64,4.64,7.68,2.24,4.32,6.72,11.2,2.24,3.52,5.28,2.72,5.6,7.36,5.92,8.0,1.28,1.44,0
2,9.94,0.0,3.44,3.63,3.06,7.65,3.06,3.06,3.06,3.44,1.53,7.07,10.13,8.03,5.93,14.15,4.21,5.74,1.15,1.72,0
3,6.57,2.35,7.04,5.16,2.35,5.16,3.76,4.69,1.88,11.74,2.82,4.23,1.41,7.51,10.8,5.16,5.16,7.98,1.41,2.82,0
4,5.35,3.38,4.51,5.07,1.41,5.35,2.82,5.35,5.07,8.73,1.41,4.51,5.63,4.79,3.66,8.17,10.42,9.3,2.25,2.82,0


In [0]:
X = df.iloc[:, 0:20].values
Y = df.iloc[:, 20].values

In [0]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, stratify = Y, random_state = 42)

In [0]:
import numpy as np
unique, counts = np.unique(y_train, return_counts=True)
dict(zip(unique, counts))

{0: 7052, 1: 7053}

In [0]:
x_train.shape, x_test.shape

((14105, 20), (6045, 20))

In [0]:
#Reshaping the train and test set
x_train_reshape = x_train.reshape(14105, 20, 1)
y_train_reshape = y_train.reshape(14105, 1)
x_test_reshape = x_test.reshape(6045, 20, 1)

In [0]:
#Reshaping the Labels
y_train_ = tf.keras.utils.to_categorical(y_train_reshape,num_classes=2)


In [0]:
model = CapsNet(input_shape=[20, 1],
                n_class=2,
                num_routing=4)

[20, 1] (?, 20, 1)


In [0]:
train(model=model, data=((x_train_reshape, y_train_)), epoch_size_frac = 0.5)

Train on 11284 samples, validate on 2821 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/15

<keras.engine.training.Model at 0x7f3e14436780>

In [0]:
layer_name = 'out_caps'
intermediate_layer_model = models.Model(inputs=model.input[0],
                                 outputs=model.get_layer(layer_name).output)

In [0]:
y_pred = intermediate_layer_model.predict(x_test_reshape)

In [0]:
cm = confusion_matrix(y_test, np.argmax(y_pred, axis=1))
cm

array([[2793,  230],
       [ 142, 2880]])

# AutoML

In [0]:
!apt-get install swig -y
!pip install Cython numpy

# sometimes you have to run the next command twice on colab
# I haven't figured out why
!pip install auto-sklearn

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-410
Use 'apt autoremove' to remove it.
The following additional packages will be installed:
  swig3.0
Suggested packages:
  swig-doc swig-examples swig3.0-examples swig3.0-doc
The following NEW packages will be installed:
  swig swig3.0
0 upgraded, 2 newly installed, 0 to remove and 6 not upgraded.
Need to get 1,100 kB of archives.
After this operation, 5,822 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 swig3.0 amd64 3.0.12-1 [1,094 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 swig amd64 3.0.12-1 [6,460 B]
Fetched 1,100 kB in 0s (7,872 kB/s)
Selecting previously unselected package swig3.0.
(Reading database ... 130911 files and directories currently installed.)
Preparing to unpack .../swig3.0_3.0.12-1_amd64.deb ...
Unpac

In [0]:
!pip install --no-cache-dir -v pyrfr



In [0]:
!sudo apt-get install build-essential swig


In [0]:
import autosklearn.classification

  from numpy.core.umath_tests import inner1d


In [0]:
cls =  autosklearn.classification.AutoSklearnClassifier()
     

In [0]:
x_train = df.iloc[:,0:20].values
y_train = df.iloc[:,20].values
x_test = df2.iloc[:,0:20].values
y_test = df2.iloc[:,20].values

In [0]:
cls.fit(x_train, y_train)

In [0]:
y_pred = cls.predict(x_test)

In [0]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.971712158808933

In [0]:
print(cls.sprint_statistics())

auto-sklearn results:
  Dataset name: cd34fe534e3c1f704d699b6d2d4b61e0
  Metric: accuracy
  Best validation score: 0.964286
  Number of target algorithm runs: 106
  Number of successful target algorithm runs: 102
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 4
  Number of target algorithms that exceeded the memory limit: 0



In [0]:
print(cls.show_models())

In [0]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_true, y_pred)

NameError: ignored

## Auto Sklearn with entire data

In [0]:
from google.colab import files
uploaded = files.upload()


Saving Complete_Data_Allergens.csv to Complete_Data_Allergens.csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['Complete_Data_Allergens.csv']))

In [0]:
x = df.iloc[:, 0:20].values
y = df.iloc[:, 20].values

In [0]:
import numpy as np
unique, counts = np.unique(y, return_counts=True)
dict(zip(unique, counts))

{0: 10075, 1: 10075}

In [0]:
import autosklearn.classification
cls = autosklearn.classification.AutoSklearnClassifier(resampling_strategy='cv',
        resampling_strategy_arguments={'folds': 10})
     

In [0]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, stratify = y, random_state = 42)

In [0]:
cls.fit(x_train, y_train)

1
['/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000000.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000001.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000002.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000003.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000004.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000005.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000006.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000007.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000008.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000009.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000010.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ensembles/1.0000000011.ensemble', '/tmp/autosklearn_tmp_143_3897/.auto-sklearn/ense

AutoSklearnClassifier(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_memory_limit=1024,
           ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25, logging_config=None,
           metadata_directory=None, ml_memory_limit=3072, n_jobs=None,
           output_folder=None, per_run_time_limit=360,
           resampling_strategy='cv',
           resampling_strategy_arguments={'folds': 10}, seed=1,
           shared_mode=False, smac_scenario_args=None,
           time_left_for_this_task=3600, tmp_folder=None)

In [0]:
y_pred = cls.refit(x.copy(), y.copy())
cls.cv_results_

{'mean_fit_time': array([ 45.39333773, 221.397861  , 108.75488901,  31.94562531,
        215.15770936, 191.84766197, 130.60086608, 360.11775088,
        312.59700274,  19.98884392,  35.35358596, 125.36279559,
         41.36913657,   9.83233643,  17.94522023,   5.56589651,
         46.92602515,  35.48386478,  47.09789705, 360.1178956 ,
         31.96032047,  55.35086775,  52.24155927,  28.08233094,
        360.08591533,  20.13560081,  25.71572185, 104.14301491,
         82.14506149,  21.13012505,  82.76960564, 344.0286665 ]),
 'mean_test_score': array([0.96501241, 0.95626551, 0.97115385, 0.91098015, 0.95465261,
        0.96544665, 0.95403226, 0.        , 0.88058313, 0.89621588,
        0.85887097, 0.92022333, 0.96724566, 0.80384615, 0.73684864,
        0.77884615, 0.96792804, 0.93411911, 0.94050868, 0.        ,
        0.94863524, 0.93200993, 0.95849876, 0.93926799, 0.        ,
        0.9117866 , 0.86271712, 0.91532258, 0.95880893, 0.91643921,
        0.95744417, 0.        ]),
 'param_

In [0]:
y_score = cls.predict_proba(x_test)

In [0]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score
print("The accuracy is %d", accuracy_score(y_test, y_pred))
#print("The MCC is %d", matthews_corrcoef(y_test, y_pred))
#print("The AUC-ROC Score is %d", roc_auc_score(y_test, y_score))

TypeError: ignored

In [0]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score
print("The accuracy is %d", accuracy_score(y_test, y_pred))
print("The MCC is %d", matthews_corrcoef(y_test, y_pred))
print("The AUC-ROC Score is %d", roc_auc_score(y_test, y_score))

# Auto Sklearn on Set 2

In [0]:
!apt-get install swig -y
!pip install Cython numpy

# sometimes you have to run the next command twice on colab
# I haven't figured out why
!pip install auto-sklearn

In [0]:
import autosklearn.classification

  from numpy.core.umath_tests import inner1d


In [0]:
cls2 = autosklearn.classification.AutoSklearnClassifier()

In [0]:
from google.colab import files
uploaded = files.upload()


Saving AComp2.csv to AComp2 (1).csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['AComp2.csv']))

In [0]:
from google.colab import files
uploaded = files.upload()


Saving test_AComp2.csv to test_AComp2 (1).csv


In [0]:
df2 = pd.read_csv(io.BytesIO(uploaded['test_AComp2.csv']))

In [0]:
x_train = df.iloc[:, 0:20].values
y_train = df.iloc[:, 20].values
x_test = df2.iloc[:, 0:20].values
y_test = df2.iloc[:, 20].values

In [0]:
cls2.fit(x_train, y_train)

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)




  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)




  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_

1
['/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000000.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000001.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000002.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000003.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000004.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000005.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000006.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000007.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000008.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000009.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000010.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ensembles/1.0000000011.ensemble', '/tmp/autosklearn_tmp_131_3838/.auto-sklearn/ense

AutoSklearnClassifier(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_memory_limit=1024,
           ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25, logging_config=None,
           metadata_directory=None, ml_memory_limit=3072, n_jobs=None,
           output_folder=None, per_run_time_limit=360,
           resampling_strategy='holdout',
           resampling_strategy_arguments=None, seed=1, shared_mode=False,
           smac_scenario_args=None, time_left_for_this_task=3600,
           tmp_folder=None)

In [0]:
y_score = cls2.predict_proba(x_test)
y_score.shape

(4030, 2)

In [0]:
import numpy as np
y_score2 = np.argmax(y_score, axis = 1)
y_score2

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score
print("The accuracy is", accuracy_score(y_test, y_pred))
print("The MCC is", matthews_corrcoef(y_test, y_pred))
print("The AUC-ROC Score is", roc_auc_score(y_test, y_score2))

The accuracy is 0.9707196029776675
The MCC is 0.9415894928700321
The AUC-ROC Score is 0.9707196029776676


# Auto Sklearn on Set 3

In [0]:
!apt-get install swig -y
!pip install Cython numpy

# sometimes you have to run the next command twice on colab
# I haven't figured out why
!pip install auto-sklearn

Reading package lists... Done
Building dependency tree       
Reading state information... Done
swig is already the newest version (3.0.12-1).
The following package was automatically installed and is no longer required:
  libnvidia-common-410
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.


In [0]:
import autosklearn.classification

  from numpy.core.umath_tests import inner1d


In [0]:
cls3 = autosklearn.classification.AutoSklearnClassifier()

In [0]:
from google.colab import files
uploaded = files.upload()

Saving AComp3.csv to AComp3 (2).csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['AComp3.csv']))

In [0]:
from google.colab import files
uploaded = files.upload()

Saving test_AComp3.csv to test_AComp3 (1).csv


In [0]:
import pandas as pd
import io
df2 = pd.read_csv(io.BytesIO(uploaded['test_AComp3.csv']))

In [0]:
x_train = df.iloc[:, 0:20].values
y_train = df.iloc[:, 20].values
x_test = df2.iloc[:, 0:20].values
y_test = df2.iloc[:, 20].values

In [0]:
cls3.fit(x_train, y_train)

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)




  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_

1
['/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000000.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000001.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000002.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000003.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000004.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000005.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000006.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000007.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000008.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000009.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000010.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto-sklearn/ensembles/1.0000000011.ensemble', '/tmp/autosklearn_tmp_2821_3933/.auto

AutoSklearnClassifier(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_memory_limit=1024,
           ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25, logging_config=None,
           metadata_directory=None, ml_memory_limit=3072, n_jobs=None,
           output_folder=None, per_run_time_limit=360,
           resampling_strategy='holdout',
           resampling_strategy_arguments=None, seed=1, shared_mode=False,
           smac_scenario_args=None, time_left_for_this_task=3600,
           tmp_folder=None)

In [0]:
y_score = cls3.predict_proba(x_test)
y_score.shape

(4030, 2)

In [0]:
import numpy as np
y_score3 = np.argmax(y_score, axis = 1)
y_score3

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
y_pred3 = cls3.predict(x_test)
y_pred3

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score
print("The accuracy is", accuracy_score(y_test, y_pred3))
print("The MCC is", matthews_corrcoef(y_test, y_pred3))
print("The AUC-ROC Score is", roc_auc_score(y_test, y_score3))

The accuracy is 0.9719602977667494
The MCC is 0.9441998114631105
The AUC-ROC Score is 0.9719602977667494


# Auto Sklearn on Set 4

In [0]:
!apt-get install swig -y
!pip install Cython numpy

# sometimes you have to run the next command twice on colab
# I haven't figured out why
!pip install auto-sklearn

Reading package lists... 0%Reading package lists... 0%Reading package lists... 0%Reading package lists... 7%Reading package lists... 7%Reading package lists... 8%Reading package lists... 8%Reading package lists... 72%Reading package lists... 72%Reading package lists... 73%Reading package lists... 73%Reading package lists... 79%Reading package lists... 79%Reading package lists... 79%Reading package lists... 79%Reading package lists... 85%Reading package lists... 88%Reading package lists... 88%Reading package lists... 88%Reading package lists... 88%Reading package lists... 88%Reading package lists... 88%Reading package lists... 88%Reading package lists... 88%Reading package lists... 91%Reading package lists... 91%Reading package lists... 91%Reading package lists... 91%Reading package lists... 93%Reading package lists... 93%Reading package lists... 93%Reading package lists... 93%Reading package lists... 94%Reading package 

In [0]:
import autosklearn.classification
cls4 = autosklearn.classification.AutoSklearnClassifier()


In [0]:
from google.colab import files
uploaded = files.upload()

Saving AComp4.csv to AComp4.csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['AComp4.csv']))

In [0]:
from google.colab import files
uploaded = files.upload()

Saving test_AComp4.csv to test_AComp4.csv


In [0]:
import pandas as pd
import io
df2 = pd.read_csv(io.BytesIO(uploaded['test_AComp4.csv']))

In [0]:
x_train = df.iloc[:, 0:20].values
y_train = df.iloc[:, 20].values
x_test = df2.iloc[:, 0:20].values
y_test = df2.iloc[:, 20].values

In [0]:
cls4.fit(x_train, y_train)

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)




  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_

1
['/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000000.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000001.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000002.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000003.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000004.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000005.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000006.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000007.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000008.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000009.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000010.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto-sklearn/ensembles/1.0000000011.ensemble', '/tmp/autosklearn_tmp_3200_6077/.auto

AutoSklearnClassifier(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_memory_limit=1024,
           ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25, logging_config=None,
           metadata_directory=None, ml_memory_limit=3072, n_jobs=None,
           output_folder=None, per_run_time_limit=360,
           resampling_strategy='holdout',
           resampling_strategy_arguments=None, seed=1, shared_mode=False,
           smac_scenario_args=None, time_left_for_this_task=3600,
           tmp_folder=None)

In [0]:
y_score = cls4.predict_proba(x_test)
y_score.shape

(4030, 2)

In [0]:
import numpy as np
y_score4 = np.argmax(y_score, axis = 1)
y_score4

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
y_pred4 = cls4.predict(x_test)
y_pred4

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score
print("The accuracy is", accuracy_score(y_test, y_pred4))
print("The MCC is", matthews_corrcoef(y_test, y_pred4))
print("The AUC-ROC Score is", roc_auc_score(y_test, y_score4))

The accuracy is 0.9660049627791564
The MCC is 0.9321670897959992
The AUC-ROC Score is 0.9660049627791563


 # Auto Sklearn on Set 5
 

In [0]:
!apt-get install swig -y
!pip install Cython numpy

# sometimes you have to run the next command twice on colab
# I haven't figured out why
!pip install auto-sklearn

In [0]:
import autosklearn.classification
cls5 = autosklearn.classification.AutoSklearnClassifier()


In [0]:
from google.colab import files
uploaded = files.upload()

Saving AComp5.csv to AComp5 (2).csv


In [0]:
import pandas as pd
import io
df = pd.read_csv(io.BytesIO(uploaded['AComp5.csv']))

In [0]:
from google.colab import files
uploaded = files.upload()

Saving test_AComp5.csv to test_AComp5 (2).csv


In [0]:
import pandas as pd
import io
df2 = pd.read_csv(io.BytesIO(uploaded['test_AComp5.csv']))

In [0]:
x_train = df.iloc[:, 0:20].values
y_train = df.iloc[:, 20].values
x_test = df2.iloc[:, 0:20].values
y_test = df2.iloc[:, 20].values

In [0]:
cls5.fit(x_train, y_train)

  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)




  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)




  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_pred = np.nanmean(Y_train_pred_full, axis=0)
  Y_train_

1
['/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000000.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000001.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000002.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000003.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000004.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000005.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000006.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000007.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000008.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000009.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000010.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto-sklearn/ensembles/1.0000000011.ensemble', '/tmp/autosklearn_tmp_4103_5627/.auto

AutoSklearnClassifier(delete_output_folder_after_terminate=True,
           delete_tmp_folder_after_terminate=True,
           disable_evaluator_output=False, ensemble_memory_limit=1024,
           ensemble_nbest=50, ensemble_size=50, exclude_estimators=None,
           exclude_preprocessors=None, get_smac_object_callback=None,
           include_estimators=None, include_preprocessors=None,
           initial_configurations_via_metalearning=25, logging_config=None,
           metadata_directory=None, ml_memory_limit=3072, n_jobs=None,
           output_folder=None, per_run_time_limit=360,
           resampling_strategy='holdout',
           resampling_strategy_arguments=None, seed=1, shared_mode=False,
           smac_scenario_args=None, time_left_for_this_task=3600,
           tmp_folder=None)

In [0]:
y_score = cls5.predict_proba(x_test)
y_score.shape

(4030, 2)

In [0]:
import numpy as np
y_score5 = np.argmax(y_score, axis = 1)
y_score5

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
y_pred5 = cls5.predict(x_test)
y_pred5

array([1, 1, 1, ..., 0, 0, 0])

In [0]:
from sklearn.metrics import accuracy_score, matthews_corrcoef, roc_auc_score
print("The accuracy is", accuracy_score(y_test, y_pred5))
print("The MCC is", matthews_corrcoef(y_test, y_pred5))
print("The AUC-ROC Score is", roc_auc_score(y_test, y_score5))

The accuracy is 0.9652605459057072
The MCC is 0.930638454010571
The AUC-ROC Score is 0.9652605459057071
