In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OneHotEncoder, normalize,Binarizer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from tensorflow.keras.utils import plot_model
import pydot

Neural architecture search (NAS) is an open problem in the world of artificial neural networks (ANNs). NAS encompasses the process of selecting the standard architectural hyperparameters of a network; neurons per layer, numbers of layer, connections between layers. Often times this is done either intuitively (with some trial & error) from previous experience, or through a brute force grid search type method. Most often, custom architectures are wholly avoided, instead using proven architectures implemented on same or similar problems.

The below solution is a proof of concept for a generalizeable NAS method. It has works by over-paramartizing the model, and then using regularization and weight thresholds to sparsify it. Each layer is fully connected to the next, like a standard feedforward neural network, except each layer is also fully connected to *every* proceeding layer, not just the next. This allows the network to train via normal back propagation, but the increase in connections along with the push for sparsification allows it to learn skip nodes or even layers. 

In the example below, an over-parameterized network of 19,412 trainable connection weight parameters, at 10 hidden layers and 20 neurons per layer, is reduced to 91 connections. That is a 98.9% reduction, with no loss on training OR testing performance.

In [2]:
train = pd.read_csv("/home/john/Documents/julia_data/iris_data/train.csv")
test = pd.read_csv("/home/john/Documents/julia_data/iris_data/test.csv")

In [3]:
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
train_y = train['Survived'].astype(float)
train_x = train.drop(['Survived'],axis=1)

In [5]:
class dropColumnTransformer(BaseEstimator,TransformerMixin):
    def __init__(self,feature_list):
        self.feature_list = feature_list
    
    def fit(self,X,y=None):
        return self
    
    def transform(self,X,y=None):
        X_ = X.copy()
        X_ = X_.drop(self.feature_list,axis=1)
        return X_

In [6]:
class stringSplitter(BaseEstimator,TransformerMixin):
    def __init__(self,feature_name):
        self.feature_name = feature_name
    
    def fit(self,X,y=None):
        return self
    
    def transform(self,X,y=None):
        X_ = X.copy()
        lets = self.feature_name+"_let"
        nums = self.feature_name+"_num"
        X_[lets]=X_[self.feature_name].str.slice(0,1)
        X_[self.feature_name]=X_[self.feature_name].fillna(0)
        X_[nums]=[''.join(i) for i in X_[self.feature_name].str.findall('\d').fillna('0')]
        X_[nums]=X_[nums].replace('','0')
        X_[nums]=X_[nums].astype(int)
        return X_

In [7]:

numeric_features = ['Age', 'Fare']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])


categorical_features = ['Embarked', 'Pclass','Sex']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

drop_features = ['PassengerId', 'Name','Ticket','Cabin']
drop_features_transformer = dropColumnTransformer(drop_features)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features),
        ('drop', drop_features_transformer, drop_features)])

In [8]:
pipe=Pipeline(steps=[('preprocessor', preprocessor)]).fit(train_x,train_y)
#pca = PCA(7)
#transformed_x=pca.fit_transform(pipe.transform(train_x))

In [9]:
transformed_x = pipe.transform(train_x)
transformed_test = pipe.transform(test)

In [10]:
transformed_x.shape

(891, 11)

In [11]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


2021-07-23 20:45:06.026493: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2021-07-23 20:45:06.132483: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-07-23 20:45:06.133103: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce MX150 computeCapability: 6.1
coreClock: 1.5315GHz coreCount: 3 deviceMemorySize: 1.96GiB deviceMemoryBandwidth: 44.76GiB/s
2021-07-23 20:45:06.139833: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2021-07-23 20:45:06.204149: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-07-23 20:45:06.242955: I tensorflow/stream_executor/platfor

In [12]:
input0 = tf.keras.layers.Input(shape=(11,))

layer1 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(input0)
merge1 = tf.keras.layers.Concatenate(axis=1)([input0,layer1])

layer2 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge1)
merge2 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2])

layer3 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge2)
merge3 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3])

layer4 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge3)
merge4 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4])

layer5 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge4)
merge5 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4,layer5])

layer6 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge4)
merge6 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4,layer5, layer6])

layer7 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge4)
merge7 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4,layer5, layer6, layer7])

layer8 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge7)
merge8 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4,layer5, layer6, layer7, layer8])

layer9 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge8)
merge9 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4,layer5, layer6, layer7,layer8,layer9])

layer10 = tf.keras.layers.Dense(20,activation='relu',kernel_regularizer='l1')(merge9)
merge10 = tf.keras.layers.Concatenate(axis=1)([input0,layer1,layer2,layer3,layer4,layer5, layer6, layer7,layer8,layer9,layer10])

output0 = tf.keras.layers.Dense(1,activation='sigmoid',kernel_regularizer='l1')(merge10)
model = tf.keras.Model(inputs=input0,outputs=output0)

2021-07-23 20:45:06.511147: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2021-07-23 20:45:06.527266: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 1999965000 Hz
2021-07-23 20:45:06.528194: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55973e476e20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-07-23 20:45:06.528211: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-07-23 20:45:06.530093: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-07-23 20:45:06.530377: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVI

In [13]:
import pydot
plot_model(model=model, show_shapes=True)

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [14]:
model.compile(optimizer='adam',
              loss='BinaryCrossentropy',
              metrics=['accuracy'])
model.fit(transformed_x, train_y, epochs=50)

Epoch 1/50


2021-07-23 20:45:09.905181: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f004bcb2640>

In [15]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 11)]         0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 20)           240         input_1[0][0]                    
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 31)           0           input_1[0][0]                    
                                                                 dense[0][0]                      
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 20)           640         concatenate[0][0]            

In [16]:
og_weights = model.get_weights()

In [17]:
og_weights

[array([[-1.09989567e-04,  1.57692964e-04, -1.66431753e-04,
          1.88184276e-05,  3.52784809e-06,  4.21484074e-05,
          9.24253254e-05, -4.47788079e-06,  8.18187764e-05,
          9.15573037e-05,  8.98995786e-05,  1.52192486e-04,
         -2.91304488e-04,  6.25989196e-06,  1.24493072e-05,
         -4.06962390e-05,  2.33132305e-04,  1.63034383e-05,
         -9.70215478e-05,  1.58596813e-04],
        [-6.52633826e-05, -4.95688873e-05, -3.24341599e-05,
         -1.06524138e-04,  2.00523398e-04,  8.64105386e-05,
          7.50221661e-05, -6.40631333e-05,  2.10868275e-05,
         -3.94593735e-05,  1.06871048e-05,  3.36336743e-05,
          1.22840225e-04, -3.31765659e-05,  9.78302778e-05,
          5.62195783e-05,  3.47434194e-04, -9.23745392e-05,
         -2.51547724e-04, -9.55923606e-05],
        [ 1.00722296e-04, -1.57410628e-04,  3.59125916e-05,
         -1.24744474e-04,  4.40898220e-05, -2.71361321e-04,
         -1.32482965e-04,  5.14197163e-05,  1.76411297e-04,
          1.

In [18]:
weights=og_weights


In [19]:
type(weights[0]) == np.ndarray

True

In [20]:
model.set_weights(og_weights)

In [21]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(transformed_x, train_y, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [0.5573949217796326, 0.7867564558982849]


In [22]:
threshold = 0.01
count = 0
for i in range(len(weights)):
    for j in range(len(weights[i])):
        if type(weights[i][j]) == np.ndarray:
            for k in range(len(weights[i][j])):
                if abs(weights[i][j][k]) < threshold:
                    weights[i][j][k] = 0
                    count = count+1
        elif abs(weights[i][j]) < threshold:
            weights[i][j] = 0
            count = count+1
        else:
            print(i,j)

1 0
1 1
1 2
1 3
1 4
1 5
1 6
1 7
1 8
1 9
1 11
1 12
1 13
1 14
1 15
1 16
1 17
1 18
1 19
3 0
3 1
3 2
3 3
3 4
3 6
3 7
3 8
3 9
3 10
3 11
3 12
3 13
3 14
3 15
3 16
3 17
3 18
3 19
5 1
5 2
5 3
5 4
5 5
5 6
5 7
5 8
5 9
5 10
5 11
5 12
5 13
5 14
5 15
5 16
5 17
5 19
7 0
7 2
7 4
7 5
7 6
7 7
7 8
7 9
7 10
7 11
7 13
7 14
7 15
7 16
7 17
7 18
7 19
9 0
9 3
9 4
9 5
9 7
9 8
9 9
9 10
9 12
9 13
9 14
9 15
9 17
9 18
9 19
11 1
11 2
11 3
11 5
11 6
11 7
11 8
11 9
11 10
11 11
11 12
11 13
11 14
11 15
11 16
11 17
11 18
13 0
13 1
13 2
13 3
13 4
13 5
13 6
13 7
13 8
13 9
13 12
13 13
13 14
13 16
13 17
13 18
13 19
15 0
15 1
15 2
15 3
15 4
15 6
15 7
15 8
15 9
15 10
15 11
15 12
15 13
15 14
15 16
15 17
15 18
15 19
17 0
17 2
17 3
17 4
17 5
17 6
17 7
17 8
17 9
17 10
17 11
17 12
17 13
17 14
17 15
17 16
17 17
17 18
17 19
19 0
19 2
19 4
19 5
19 6
19 7
19 8
19 9
19 10
19 12
19 13
19 15
19 16
19 17
19 18
19 19
21 0


In [23]:
len(weights[10])

91

In [24]:
count / 19412

0.9894395219451886

In [25]:
og_weights

[array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.],
        [0

In [26]:
model.set_weights(weights)

In [27]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(transformed_x, train_y, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [0.5348770618438721, 0.7856341004371643]


In [28]:
test_results=model.predict(transformed_test)

In [29]:
for i in range(len(test_results)):
    if test_results[i] < 0.5:
        test_results[i] = 0
    else:
        test_results[i] = 1

In [30]:
test_results=pd.DataFrame(test_results.astype(int),columns=['Survived'])

In [31]:
test_results['PassengerId'] = test.PassengerId

In [32]:
test_results=test_results[['PassengerId','Survived']]

In [None]:
test_results.to_csv('reallydense_gender_submission.csv',index=False)