# Importing Packages

### 1) import tensoflow and other related packages

In [1]:
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from pandas.api.types import CategoricalDtype 
from sklearn import metrics
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix
from sklearn.svm import LinearSVC
from sklearn.linear_model import Perceptron
import keras_tuner as kt
#%load_ext tensorboard => I am running tensorboard from Terminal


### 2) import additional packages

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math 
import shutil
import os
import time
import json

plt.rcParams['figure.figsize'] = [9, 6]
sns.set_style("darkgrid")
base_log_dir = "./logs/"

if not os.path.exists(base_log_dir):
    os.makedirs(base_log_dir)


### 3) simple utils to handel some implementation

In [3]:
def CalculateMetricsAndPlot(true_label, predicted_label,color="Blues",text=""):
    CM = confusion_matrix(true_label, predicted_label)
    acc = round(accuracy_score(true_label,predicted_label)*100,2)
    precision = round(precision_score(true_label,predicted_label, average='macro'),2)
    if text == "":
        sns.heatmap(CM ,annot=True, cmap=color, fmt='g').set_title("Confusion Matrix for Test Data | Accuracy={0}% | Precision={1}".format(acc,precision))
    else :
        sns.heatmap(CM ,annot=True, cmap=color, fmt='g').set_title("Confusion Matrix for Test Data | Accuracy={0}% | Precision={1} | {2}".format(acc,precision,text))
    
    plt.show()

# Loading Dataset

In [4]:
raw_data = pd.read_csv("./crx.data",header=None,na_values="?")
continuous_attributes = [1,2,7,10,13,14]
discrete_attributes = []
for i in range(15):
    if not(i in continuous_attributes):
        discrete_attributes.append(i)
raw_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,b,30.83,0.000,u,g,w,v,1.25,t,t,1,f,g,202.0,0,+
1,a,58.67,4.460,u,g,q,h,3.04,t,t,6,f,g,43.0,560,+
2,a,24.50,0.500,u,g,q,h,1.50,t,f,0,f,g,280.0,824,+
3,b,27.83,1.540,u,g,w,v,3.75,t,t,5,t,g,100.0,3,+
4,b,20.17,5.625,u,g,w,v,1.71,t,f,0,f,s,120.0,0,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,b,21.08,10.085,y,p,e,h,1.25,f,f,0,f,g,260.0,0,-
686,a,22.67,0.750,u,g,c,v,2.00,f,t,2,t,g,200.0,394,-
687,a,25.25,13.500,y,p,ff,ff,2.00,f,t,1,t,g,200.0,1,-
688,b,17.92,0.205,u,g,aa,v,0.04,f,f,0,f,g,280.0,750,-


# Preprocessing on Raw Dataset

### 1) Filling missing values
- Filling continous attributes with its mean
- Filling discrete attributes with its mode

In [5]:
print("Total count of missing values(cells) before filling:\n",raw_data.isna().sum())
for index in range(15):
    if index in continuous_attributes :
        _mean = raw_data[index].mean()
        raw_data[index] = raw_data[index].replace(math.nan,_mean)
    else:        
        _mode = raw_data[index].mode()[0]
        raw_data[index] = raw_data[index].replace(math.nan,_mode)
        
print("Total count of missing values(cells) after filling:",raw_data.isna().sum().sum())

Total count of missing values(cells) before filling:
 0     12
1     12
2      0
3      6
4      6
5      9
6      9
7      0
8      0
9      0
10     0
11     0
12     0
13    13
14     0
15     0
dtype: int64
Total count of missing values(cells) after filling: 0


### 2) Normalize continous attributes

In [6]:
min_max_scaler = MinMaxScaler()
x = raw_data[continuous_attributes].values
x_scaled = min_max_scaler.fit_transform(x)
normalized = pd.DataFrame(x_scaled, columns=continuous_attributes, index = raw_data.index)
raw_data[continuous_attributes] = normalized
raw_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,b,0.256842,0.000000,u,g,w,v,0.043860,t,t,0.014925,f,g,0.1010,0.00000,+
1,a,0.675489,0.159286,u,g,q,h,0.106667,t,t,0.089552,f,g,0.0215,0.00560,+
2,a,0.161654,0.017857,u,g,q,h,0.052632,t,f,0.000000,f,g,0.1400,0.00824,+
3,b,0.211729,0.055000,u,g,w,v,0.131579,t,t,0.074627,t,g,0.0500,0.00003,+
4,b,0.096541,0.200893,u,g,w,v,0.060000,t,f,0.000000,f,s,0.0600,0.00000,+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,b,0.110226,0.360179,y,p,e,h,0.043860,f,f,0.000000,f,g,0.1300,0.00000,-
686,a,0.134135,0.026786,u,g,c,v,0.070175,f,t,0.029851,t,g,0.1000,0.00394,-
687,a,0.172932,0.482143,y,p,ff,ff,0.070175,f,t,0.014925,t,g,0.1000,0.00001,-
688,b,0.062707,0.007321,u,g,aa,v,0.001404,f,f,0.000000,f,g,0.1400,0.00750,-


### 3) Convert categorical attributes to numeric with integer encoding

In [7]:
# label => integer encoding
ord_enc = OrdinalEncoder(dtype=np.int32)
raw_data[15] = ord_enc.fit_transform(raw_data[[15]])

raw_data_to_plot = raw_data.copy()

# other discrete attributes => one hot encoding
one_hot = pd.get_dummies(raw_data[discrete_attributes])
raw_data = raw_data.drop(discrete_attributes,axis = 1)
raw_data = raw_data.join(one_hot)
raw_data



Unnamed: 0,1,2,7,10,13,14,15,0_a,0_b,3_l,...,6_z,8_f,8_t,9_f,9_t,11_f,11_t,12_g,12_p,12_s
0,0.256842,0.000000,0.043860,0.014925,0.1010,0.00000,0,0,1,0,...,0,0,1,0,1,1,0,1,0,0
1,0.675489,0.159286,0.106667,0.089552,0.0215,0.00560,0,1,0,0,...,0,0,1,0,1,1,0,1,0,0
2,0.161654,0.017857,0.052632,0.000000,0.1400,0.00824,0,1,0,0,...,0,0,1,1,0,1,0,1,0,0
3,0.211729,0.055000,0.131579,0.074627,0.0500,0.00003,0,0,1,0,...,0,0,1,0,1,0,1,1,0,0
4,0.096541,0.200893,0.060000,0.000000,0.0600,0.00000,0,0,1,0,...,0,0,1,1,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,0.110226,0.360179,0.043860,0.000000,0.1300,0.00000,1,0,1,0,...,0,1,0,1,0,1,0,1,0,0
686,0.134135,0.026786,0.070175,0.029851,0.1000,0.00394,1,1,0,0,...,0,1,0,0,1,0,1,1,0,0
687,0.172932,0.482143,0.070175,0.014925,0.1000,0.00001,1,1,0,0,...,0,1,0,0,1,0,1,1,0,0
688,0.062707,0.007321,0.001404,0.000000,0.1400,0.00750,1,0,1,0,...,0,1,0,1,0,1,0,1,0,0


### 4) Outlier(noisy data) elimination based on Clustering method(K-Means) 

In [8]:
kmean = KMeans(n_clusters=2).fit(raw_data)
kmean.cluster_centers_   

cluster_distances = []
for index in range(raw_data.shape[0]):
    item = np.array(raw_data.iloc[index])
    cluster_1_distance = sum((item-kmean.cluster_centers_[0])**2)
    cluster_2_distance = sum((item-kmean.cluster_centers_[1])**2)
    
    cluster_distances.append(min(cluster_1_distance,cluster_2_distance))
    
"""  
sns.histplot(cluster_distances,bins=30)
plt.plot([8.5,8.5],[0,80],color="red")
plt.xlabel("Min distance to one of Clusters`s center")
plt.text(6.5,60,"Threshold to outlier =>",color="red",)
plt.title("Histogram of minimum distance to Clusters`s center")
plt.show()"""

 
print("Dataset shape before outlier elimination:",raw_data.shape)
index_to_drop = np.where(np.array(cluster_distances) > 8.5)[0]
raw_data = raw_data.drop(index_to_drop).reset_index(drop=True)
print("Dataset shape after outlier elimination:",raw_data.shape)
print("Count of outlier that was eliminated:",len(index_to_drop))
raw_data




Dataset shape before outlier elimination: (690, 47)
Dataset shape after outlier elimination: (688, 47)
Count of outlier that was eliminated: 2


Unnamed: 0,1,2,7,10,13,14,15,0_a,0_b,3_l,...,6_z,8_f,8_t,9_f,9_t,11_f,11_t,12_g,12_p,12_s
0,0.256842,0.000000,0.043860,0.014925,0.1010,0.00000,0,0,1,0,...,0,0,1,0,1,1,0,1,0,0
1,0.675489,0.159286,0.106667,0.089552,0.0215,0.00560,0,1,0,0,...,0,0,1,0,1,1,0,1,0,0
2,0.161654,0.017857,0.052632,0.000000,0.1400,0.00824,0,1,0,0,...,0,0,1,1,0,1,0,1,0,0
3,0.211729,0.055000,0.131579,0.074627,0.0500,0.00003,0,0,1,0,...,0,0,1,0,1,0,1,1,0,0
4,0.096541,0.200893,0.060000,0.000000,0.0600,0.00000,0,0,1,0,...,0,0,1,1,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
683,0.110226,0.360179,0.043860,0.000000,0.1300,0.00000,1,0,1,0,...,0,1,0,1,0,1,0,1,0,0
684,0.134135,0.026786,0.070175,0.029851,0.1000,0.00394,1,1,0,0,...,0,1,0,0,1,0,1,1,0,0
685,0.172932,0.482143,0.070175,0.014925,0.1000,0.00001,1,1,0,0,...,0,1,0,0,1,0,1,1,0,0
686,0.062707,0.007321,0.001404,0.000000,0.1400,0.00750,1,0,1,0,...,0,1,0,1,0,1,0,1,0,0


# Dataset splitting

In [9]:
Y_data = np.array(raw_data.pop(15))
X_data = np.array(raw_data)
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2,stratify=Y_data)
X_train, X_valid, Y_train, Y_valid = train_test_split(X_train, Y_train, test_size=0.125, stratify=Y_train)
print("Count of Train data and its percentage out of all: {0}, {1}%".format(X_train.shape[0],round(X_train.shape[0]/X_data.shape[0]*100,2)))
print("Count of Test data and its percentage out of all: {0}, {1}%".format(X_test.shape[0],round(X_test.shape[0]/X_data.shape[0]*100,2)))
print("Count of Validation data and its percentage out of all: {0}, {1}%".format(X_valid.shape[0],round(X_valid.shape[0]/X_data.shape[0]*100,2)))
print("Shape Train:",X_train.shape)


Count of Train data and its percentage out of all: 481, 69.91%
Count of Test data and its percentage out of all: 138, 20.06%
Count of Validation data and its percentage out of all: 69, 10.03%
Shape Train: (481, 46)


# Check Linearity

### 1) Plotting dataset to check linearity

In [10]:
"""fig, axes = plt.subplots(4, 2, figsize=(20,12))
raw_data_to_plot.rename(columns= {15:"Label"},inplace=True)
axes = np.reshape(axes,(8,))
plot_index = 0
raw_data_to_plot.rename({15:"Label"})
for index in range(0,15,2):
    if index == 14:
        index =13
    axes[plot_index].set_title('Attributes / X: A{0} -  Y: A{1}'.format(index+1,index+2))
    sns.scatterplot(ax=axes[plot_index],data=raw_data_to_plot,x=index,y=index+1,hue="Label",style="Label",alpha=0.25)
    axes[plot_index].set(xlabel=None,ylabel=None)
    plot_index += 1
plt.show()




fig, axes = plt.subplots(11,11, figsize=(40,40))
axes = np.reshape(axes,(11*11,))
plot_index = 0
for i in range(0,15):
    for j in range(i,15):        
        axes[plot_index].set_title('Attributes / X: A{0} -  Y: A{1}'.format(i+1,j+1))
        sns.scatterplot(ax=axes[plot_index],data=raw_data_to_plot,x=i,y=j,hue="Label",style="Label",alpha=0.25)
        axes[plot_index].set(xlabel=None,ylabel=None)
        plot_index += 1
        
        
plt.show()"""

'fig, axes = plt.subplots(4, 2, figsize=(20,12))\nraw_data_to_plot.rename(columns= {15:"Label"},inplace=True)\naxes = np.reshape(axes,(8,))\nplot_index = 0\nraw_data_to_plot.rename({15:"Label"})\nfor index in range(0,15,2):\n    if index == 14:\n        index =13\n    axes[plot_index].set_title(\'Attributes / X: A{0} -  Y: A{1}\'.format(index+1,index+2))\n    sns.scatterplot(ax=axes[plot_index],data=raw_data_to_plot,x=index,y=index+1,hue="Label",style="Label",alpha=0.25)\n    axes[plot_index].set(xlabel=None,ylabel=None)\n    plot_index += 1\nplt.show()\n\n\n\n\nfig, axes = plt.subplots(11,11, figsize=(40,40))\naxes = np.reshape(axes,(11*11,))\nplot_index = 0\nfor i in range(0,15):\n    for j in range(i,15):        \n        axes[plot_index].set_title(\'Attributes / X: A{0} -  Y: A{1}\'.format(i+1,j+1))\n        sns.scatterplot(ax=axes[plot_index],data=raw_data_to_plot,x=i,y=j,hue="Label",style="Label",alpha=0.25)\n        axes[plot_index].set(xlabel=None,ylabel=None)\n        plot_ind

### 2) Perform practical methods(linear classifier) to check out linear separable 

In [11]:
"""svc_classifier = LinearSVC(C=10000,max_iter=100000)
svc_classifier.fit(X_train,Y_train)
Y_pre = svc_classifier.predict(X_test)
CalculateMetricsAndPlot(Y_test,Y_pre,"Greens"," SVM | C={0}".format(10000))

perceptron_classifier = Perceptron(penalty="l1",max_iter=100000)
perceptron_classifier.fit(X_train,Y_train)
Y_pre = perceptron_classifier.predict(X_test)
CalculateMetricsAndPlot(Y_test,Y_pre,"Oranges"," Perceptron | L1")"""


'svc_classifier = LinearSVC(C=10000,max_iter=100000)\nsvc_classifier.fit(X_train,Y_train)\nY_pre = svc_classifier.predict(X_test)\nCalculateMetricsAndPlot(Y_test,Y_pre,"Greens"," SVM | C={0}".format(10000))\n\nperceptron_classifier = Perceptron(penalty="l1",max_iter=100000)\nperceptron_classifier.fit(X_train,Y_train)\nY_pre = perceptron_classifier.predict(X_test)\nCalculateMetricsAndPlot(Y_test,Y_pre,"Oranges"," Perceptron | L1")'

# Custom class to build ANN

In [12]:
class SequentialANN :
    def __init__(self,_shape_input:tuple,_neurons : list,id=None,req=False):
        self.model = keras.models.Sequential()
        keras.backend.clear_session()
        self.model.add(keras.layers.Input(shape=_shape_input))
        
        for n in range(len(_neurons)) :
            if req :
                self.model.add(keras.layers.Dense(_neurons[n],activation="relu",
                                                  kernel_regularizer=keras.regularizers.l1_l2(l1=1e-4, l2=1e-3),
                                                  bias_regularizer=keras.regularizers.l2(1e-3),
                                                  activity_regularizer=keras.regularizers.l2(1e-4) ))
            else:
                self.model.add(keras.layers.Dense(_neurons[n],activation="relu"))
            
        
        if req : 
            self.model.add(keras.layers.Dense(1,activation="sigmoid",
                                                  kernel_regularizer=keras.regularizers.l1_l2(l1=1e-3, l2=1e-2),
                                                  bias_regularizer=keras.regularizers.l2(1e-2),
                                                  activity_regularizer=keras.regularizers.l2(1e-3) ))
        else: 
            self.model.add(keras.layers.Dense(1,activation="sigmoid"))
        
        self.id = id 
        self.log_dir = base_log_dir+str(id)+"/"
        
            
    def compile(self,_optimizer='adam',_loss='binary_crossentropy', _metrics=["accuracy"]):
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        else:
            shutil.rmtree(self.log_dir, ignore_errors=True)
            os.makedirs(self.log_dir)
        self.model.compile(optimizer=_optimizer, loss=_loss,metrics=_metrics)
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=1)
        self.tensorboard_callback = tensorboard_callback
        
        self.optimizer = _optimizer
        self.loss = _loss 
        self.metrics = _metrics
        
    def fit(self,**argu):
        self.model.fit(**argu)
        
        _text_log_dir = self.log_dir+"Parameter Logs"
        
        if not os.path.exists(_text_log_dir):
            os.makedirs(_text_log_dir)
        else:
            shutil.rmtree(_text_log_dir, ignore_errors=True)
            os.makedirs(_text_log_dir)
            
        file_writer = tf.summary.create_file_writer(_text_log_dir)
        
        argu["optimizer"] = self.optimizer
        argu["loss"] = self.loss
        argu["metrics"] = self.metrics
        
        
        argu["Test loss"],argu["Test accuracy"] = self.model.evaluate(X_test,Y_test)        
        
        predictions = (self.model.predict(X_test) > 0.5).astype("int32")
        argu["Confusion Matrix on test data"] = metrics.confusion_matrix(Y_test, predictions).tolist()
        
        predictions = (self.model.predict(X_train) > 0.5).astype("int32")
        argu["Confusion Matrix on train data"] = metrics.confusion_matrix(Y_train, predictions).tolist()
        
        predictions = (self.model.predict(X_valid) > 0.5).astype("int32")
        argu["Confusion Matrix on validation data"] = metrics.confusion_matrix(Y_valid, predictions).tolist()
        
        del argu["x"]
        del argu["y"]
        del argu ["validation_data"]
        del argu["callbacks"]
        
        index = 0
        for key in argu:
            with file_writer.as_default():                            
                tf.summary.text("Parameter report: "+str(key), str(key) + " : " +str(argu[key]) , step=index)
                index+=1    
                
        
        with open(self.log_dir + "json_report.json", 'w') as outfile:
            json.dump(argu, outfile)
        self.save()
        self.plot()
        
            
    def save(self):
        self.model.save(self.log_dir+"model.h5")
        
    def plot(self):
        keras.utils.plot_model(self.model,to_file=self.log_dir+"graph.png",show_shapes=True,expand_nested=True)
        
        

# Train a primary MLP model (first try) - Answer to question (3)

In [13]:
"""model = SequentialANN((X_train.shape[1],),[11],"Main")
model.compile()
model.fit(x=X_train, 
                y=Y_train, 
                batch_size=48,
                epochs=50, 
                verbose=0,
                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])
                

CalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))"""


'model = SequentialANN((X_train.shape[1],),[11],"Main")\nmodel.compile()\nmodel.fit(x=X_train, \n                y=Y_train, \n                batch_size=48,\n                epochs=50, \n                verbose=0,\n                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])\n                \n\nCalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))'

# Try to find optimal parameter(manual) - Answer to question (4)


In [14]:
"""model = SequentialANN((X_train.shape[1],),[2,2],21)
model.compile()

model.fit(x=X_train, 
                y=Y_train, 
                batch_size=48,
                epochs=200, 
                verbose=0,
                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])
                

CalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))"""



'model = SequentialANN((X_train.shape[1],),[2,2],21)\nmodel.compile()\n\nmodel.fit(x=X_train, \n                y=Y_train, \n                batch_size=48,\n                epochs=200, \n                verbose=0,\n                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])\n                \n\nCalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))'

# Try to find optimal parameter(auto) - Answer to question (4)


In [15]:
"""def ModelBuilder(hyperparameter):
    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(46,)))
    for i in range(hyperparameter.Int("num_layers", 1, 4)):
        model.add(
            keras.layers.Dense(
                units=hyperparameter.Int(f"units_{i}", min_value=1, max_value=8, step=1),
                activation= "relu",
            )
        )

    model.add(keras.layers.Dense(1, activation="sigmoid"))    
    model.compile(
        optimizer="adam",
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    return model


tuner = kt.RandomSearch(ModelBuilder,objective='val_accuracy',max_trials=20)
tuner.search(X_train, Y_train, epochs=25, validation_data=(X_valid, Y_valid))
tuner.results_summary()"""

'def ModelBuilder(hyperparameter):\n    model = keras.Sequential()\n    model.add(keras.layers.Input(shape=(46,)))\n    for i in range(hyperparameter.Int("num_layers", 1, 4)):\n        model.add(\n            keras.layers.Dense(\n                units=hyperparameter.Int(f"units_{i}", min_value=1, max_value=8, step=1),\n                activation= "relu",\n            )\n        )\n\n    model.add(keras.layers.Dense(1, activation="sigmoid"))    \n    model.compile(\n        optimizer="adam",\n        loss="binary_crossentropy",\n        metrics=["accuracy"],\n    )\n    return model\n\n\ntuner = kt.RandomSearch(ModelBuilder,objective=\'val_accuracy\',max_trials=20)\ntuner.search(X_train, Y_train, epochs=25, validation_data=(X_valid, Y_valid))\ntuner.results_summary()'

In [16]:
"""model = SequentialANN((X_train.shape[1],),[19,17],22)
model.compile()
model.fit(x=X_train, 
                y=Y_train, 
                batch_size=48,
                epochs=200, 
                verbose=0,
                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])
                

CalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))"""




'model = SequentialANN((X_train.shape[1],),[19,17],22)\nmodel.compile()\nmodel.fit(x=X_train, \n                y=Y_train, \n                batch_size=48,\n                epochs=200, \n                verbose=0,\n                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])\n                \n\nCalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))'

# Try to find overfitting parameter - Answer to question (5)


In [17]:
"""model = SequentialANN((X_train.shape[1],),[128,64,32,16],"Over-Fitting-2")
model.compile()
model.fit(x=X_train, 
                y=Y_train, 
                batch_size=24,
                epochs=200, 
                verbose=0,
                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])
                

CalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"),"Reds")"""


'model = SequentialANN((X_train.shape[1],),[128,64,32,16],"Over-Fitting-2")\nmodel.compile()\nmodel.fit(x=X_train, \n                y=Y_train, \n                batch_size=24,\n                epochs=200, \n                verbose=0,\n                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])\n                \n\nCalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"),"Reds")'

# Try to generalize  ANN - Answer to question (6) 

In [20]:
"""model = SequentialANN((X_train.shape[1],),[64,32,8],"Gen",False)
model.compile()

model.fit(x=X_train, 
                y=Y_train, 
                epochs=300, 
                batch_size= 48,
                verbose=0,
                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])
                

CalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))
model.model.summary()"""


'model = SequentialANN((X_train.shape[1],),[64,32,8],"Gen",False)\nmodel.compile()\n\nmodel.fit(x=X_train, \n                y=Y_train, \n                epochs=300, \n                batch_size= 48,\n                verbose=0,\n                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])\n                \n\nCalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))\nmodel.model.summary()'

In [21]:
"""model = SequentialANN((X_train.shape[1],),[64,32,8],"Gen-2",True)
model.compile()

model.fit(x=X_train, 
                y=Y_train, 
                epochs=300, 
                batch_size= 48,
                verbose=0,
                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])
                

CalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))
model.model.summary()"""

'model = SequentialANN((X_train.shape[1],),[64,32,8],"Gen-2",True)\nmodel.compile()\n\nmodel.fit(x=X_train, \n                y=Y_train, \n                epochs=300, \n                batch_size= 48,\n                verbose=0,\n                validation_data=(X_valid, Y_valid),callbacks=[model.tensorboard_callback])\n                \n\nCalculateMetricsAndPlot(Y_test,(model.model.predict(X_test) > 0.5).astype("int32"))\nmodel.model.summary()'