<a href="https://colab.research.google.com/github/HarryMacFarlane/AI-Course-Work/blob/main/Mini_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Models

##Neural Network Deciscion Tree (Code from Paper)
+ Including a class to easily run it

In [None]:
import tensorflow as tf
from functools import reduce
import numpy as np

def tf_kron_prod(a, b):
    res = tf.einsum('ij,ik->ijk', a, b)
    res = tf.reshape(res, [-1, tf.reduce_prod(res.shape[1:])])
    return res


def tf_bin(x, cut_points, temperature=0.1):
    # x is a N-by-1 matrix (column vector)
    # cut_points is a D-dim vector (D is the number of cut-points)
    # this function produces a N-by-(D+1) matrix, each row has only one element being one and the rest are all zeros
    D = cut_points.get_shape().as_list()[0]
    W = tf.reshape(tf.linspace(1.0, D + 1.0, D + 1), [1, -1])
    cut_points = tf.sort(cut_points)  # make sure cut_points is monotonically increasing
    b = tf.cumsum(tf.concat([tf.constant(0.0, shape=[1]), -cut_points], 0))
    h = tf.matmul(x, W) + b
    res = tf.nn.softmax(h / temperature)
    return res


def nn_decision_tree(x, cut_points_list, leaf_score, temperature=0.1):
    # cut_points_list contains the cut_points for each dimension of feature
    leaf = reduce(tf_kron_prod,
                  map(lambda z: tf_bin(x[:, z[0]:z[0] + 1], z[1], temperature), enumerate(cut_points_list)))
    return tf.matmul(leaf, leaf_score)



class DNDT():
    #Class for making deep neural decision trees

    def __init__(self, num_classes, num_features, temperature, learning_rate, iters, epsilon):
        self.num_classes = num_classes
        self.num_cut = [1]*num_features
        self.cutpoint_list = [tf.Variable(tf.random.uniform([i])) for i in self.num_cut]
        num_leaf = np.prod(np.array(self.num_cut) + 1)
        self.leafscore = tf.Variable(tf.random.uniform([num_leaf, self.num_classes]))
        self.temp = temperature
        self.lr = learning_rate
        self.epoch = iters
        self.epsilon = epsilon


    def fit(self, X, y, X_test, y_test):
      sess = tf.compat.v1.InteractiveSession()
      sess.run(tf.compat.v1.disable_v2_behavior())
      x_ph = tf.compat.v1.placeholder(tf.float32, [None, X.shape[1]])
      y_ph = tf.compat.v1.placeholder(tf.float32, [None, self.num_classes])
      y_pred = nn_decision_tree(x_ph, self.cutpoint_list, self.leafscore, temperature=0.1)
      loss = tf.reduce_mean(tf.compat.v1.losses.softmax_cross_entropy(logits=y_pred, onehot_labels=y_ph))
      opt = tf.compat.v1.train.AdamOptimizer(self.lr)
      train_step = opt.minimize(loss)
      sess.run(tf.compat.v1.global_variables_initializer())
      tf.compat.v1.initialize_all_variables
      j = 0
      for i in range(self.epoch):
       #I think first element returned is accuracy, but I'm not 100 % sure
        _, loss_e = sess.run(fetches = [train_step, loss], feed_dict={x_ph: X, y_ph: y})
        if loss_e < self.epsilon:
          print(i, " epochs: ", loss_e)
          break
        elif (i % (self.epoch/4)) == 0:
          print(j*25,"%")
          print("Loss: ", loss_e)
          pred = y_pred.eval(feed_dict={x_ph: X, y_ph: y})
          print("Accuracy: ", self.accuracy(pred, y))
          print("\n")
          j = j + 1
      pred = y_pred.eval(feed_dict={x_ph: X_test, y_ph: y_test})
      print("Final Test Accuracy: ", self.accuracy(pred, y_test))
      self.tree = y_pred
      self.sess = sess


    def closeSession(self):
      self.sess.close()

    def accuracy(self, y_pred, y_true):
      length = len(y_pred)
      acc = 0
      for i in range(length):
        current = [0]*2
        for c in range(self.num_classes):
          if current[0] < y_pred[i][c]:
            current[0] = y_pred[i][c]
            current[1] = c
        if y_true[i][current[1]] == 1:
          acc = acc + 1

      return (acc/length)









##Neural Network

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt


class NeuralNetwork():
  def __init__(self, num_layers, units_layer, input_shapeX, dropout, num_classes, flatten = False):
    #num_layers : int -> How many layers in the neural network
    #units_layer : int -> How many units per layer
    #input_shapeX : int or tuple (int,int, ...) -> What is the shape of the input (if this is a tuple, then flatten must be True)
    #dropout : int (b/w 0 and 1) -> What percentage of units are set to 0 during fitting (prevents overfitting)
    if flatten:
      self.model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape= input_shapeX),
        tf.keras.layers.Input(shape=input_shapeX),
  tf.keras.layers.Dense(units_layer[0], activation='relu')
  ])
    else:
      self.model = tf.keras.Sequential([
  tf.keras.layers.Input(shape=input_shapeX),
  tf.keras.layers.Dense(units_layer[0], activation='relu', input_dim = input_shapeX)
  ])
    for i in range(num_layers-1):
      if i == 0:
        continue
      self.model.add(tf.keras.layers.Dense(units_layer[i], activation = 'relu'))
    self.model.add(tf.keras.layers.Dropout(dropout))
    self.model.add(tf.keras.layers.Dense(num_classes, activation = 'softmax'))

    #We will just use Adam and a loss function provided by tensorflow
    if num_classes == 2:
      loss_fn = 'binary_crossentropy'
    else:
      loss_fn = 'categorical_crossentropy'
    self.model.compile(optimizer='adam',
              loss= loss_fn,
              metrics=['accuracy'])
    return

  #Method fits model up to certain iteration
  def fit(self, x_train, y_train, iter, print):
    if print:
      self.model.fit(x_train, y_train, epochs=iter)
    else:
      self.model.fit(x_train, y_train, epochs=iter, verbose=0)
    return

  #Method fits model and saves info on validation set (size of which is decided by ratio) and frequency determined by freq
  def validationfitting(self, x_train, y_train, X_val, y_val, print, iter = 30):
    if print:
      self.model.fit(x_train, y_train, epochs=iter, validation_data=(X_val, y_val), verbose = 1)
    else:
      self.model.fit(x_train, y_train, epochs=iter, validation_data=(X_val, y_val), verbose = 0)
    return
  def test(self, x_test, y_test):
    stats = self.model.evaluate(x_test,  y_test, verbose=0)
    return stats



## Regular Decision Tree

In [None]:
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import accuracy_score,precision_score, recall_score, classification_report




# Data Processing

### Read me

Before running the following codes in order, please upload the data files to the session storage.
This can be done in two step:
  1. Click on the folder icon on the left sidebar.
  2. Click on the upload icon (an arrow pointing up) and select the CSV files.
  
Files required will be provided in a zip file, including:
  1. titanic.csv
  2. haberman.data
  3. diabetes.csv
  4. credit_training.csv
  5. credit_test.csv
  6. poker-hand-training-true.data
  7. poker-hand-testing.data

The dataset are processed to either pandas frame or python list. Please use them according to your need.

### Set-up

In [None]:
pip install ucimlrepo



In [None]:
import pandas as pd

In [None]:
def uciData(path, column_names, y_index=-1):
    df = pd.read_csv(path, header=None, names=column_names)
    aList = df.values.tolist()
    #a = np.array(aList)

    a_x = []
    a_y = []
    if y_index == -1:
        for instance in aList:
            a_x.append(instance[:-1])
            a_y.append(instance[-1])
    else:
        for instance in aList:
            a_x.append(instance[:y_index] + instance[y_index+1:])
            a_y.append(instance[y_index])

    return a_x, a_y

In [None]:
def kagData(path, y_index=-1):
    df = pd.read_csv(path)
    aList = df.values.tolist()

    a_x = []
    a_y = []
    if y_index == -1:
        for instance in aList:
            a_x.append(instance[:-1])
            a_y.append(instance[-1])
    else:
        for instance in aList:
            a_x.append(instance[:y_index] + instance[y_index+1:])
            a_y.append(instance[y_index])

    return a_x, a_y

## Datasets

### Iris

In [None]:
from ucimlrepo import fetch_ucirepo
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# fetch dataset
iris = fetch_ucirepo(id=53)

# data (as pandas dataframes)
X_iris = iris.data.features
Y_iris = iris.data.targets

# data (as numpy array)
X_iris = X_iris.to_numpy()
Y_iris = Y_iris.to_numpy()

#One-hot encoding Y
y_enc = LabelEncoder().fit_transform(Y_iris)
y_label = tf.keras.utils.to_categorical(y_enc) #Converting the label into a matrix form

# metadata
print(iris.metadata)

# variable information
print(iris.variables)

# Split into train and test
Iris_X_train, Iris_X_test, Iris_y_train, Iris_y_test = train_test_split(X_iris, y_label, test_size=0.3)

{'uci_id': 53, 'name': 'Iris', 'repository_url': 'https://archive.ics.uci.edu/dataset/53/iris', 'data_url': 'https://archive.ics.uci.edu/static/public/53/data.csv', 'abstract': 'A small classic dataset from Fisher, 1936. One of the earliest known datasets used for evaluating classification methods.\n', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Tabular'], 'num_instances': 150, 'num_features': 4, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1936, 'last_updated': 'Tue Sep 12 2023', 'dataset_doi': '10.24432/C56C76', 'creators': ['R. A. Fisher'], 'intro_paper': {'title': 'The Iris data set: In search of the source of virginica', 'authors': 'A. Unwin, K. Kleinman', 'published_in': 'Significance, 2021', 'year': 2021, 'url': 'https://www.semanticscholar.org/paper/4599862ea877863669a6a8e63a3c707a787d5d7e', 'doi': '1740-9713.01589'}, 'add

  y = column_or_1d(y, warn=True)


### Haberman's Survival

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

path = "/content/haberman.data"
names = ["Age", "Year", "Nodes", "Survival_Status"]
X_haberman, y_haberman = uciData(path, names)
X_haberman = np.array(X_haberman)
y_haberman = np.array(y_haberman)

#One-hot encoding Y
y_enc = LabelEncoder().fit_transform(y_haberman)
y_label = tf.keras.utils.to_categorical(y_enc) #Converting the label into a matrix form

#Rename and train/test split
Haber_X_train, Haber_X_test, Haber_y_train, Haber_y_test = train_test_split(X_haberman, y_label, test_size=0.3)




### Car Evaluation

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
car_evaluation = fetch_ucirepo(id=19)

# data (as pandas dataframes)
X_car = car_evaluation.data.features
y_car = car_evaluation.data.targets

#Make them numpy array's
X_car = np.array(X_car)
y_car = np.array(y_car)

# metadata
print(car_evaluation.metadata)

# variable information
print(car_evaluation.variables)

#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_car)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename and split into train and test
Car_X_train, Car_X_test, Car_y_train, Car_y_test = train_test_split(X_car, y_label, test_size=0.3)

#We need to encode the data into onehot features, for everything, I will do this tomorrow, for now we don't use this
print(Car_X_train[0])

{'uci_id': 19, 'name': 'Car Evaluation', 'repository_url': 'https://archive.ics.uci.edu/dataset/19/car+evaluation', 'data_url': 'https://archive.ics.uci.edu/static/public/19/data.csv', 'abstract': 'Derived from simple hierarchical decision model, this database may be useful for testing constructive induction and structure discovery methods.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1728, 'num_features': 6, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5JP48', 'creators': ['Marko Bohanec'], 'intro_paper': {'title': 'Knowledge acquisition and explanation for multi-attribute decision making', 'authors': 'M. Bohanec, V. Rajkovič', 'published_in': '8th Intl Workshop on Expert Systems and their Applications, Avignon, France', 'yea

  y = column_or_1d(y, warn=True)


### Titanic *

In [None]:
path = "/content/titanic.csv"
X_titanic, y_titanic = kagData(path)


#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_titanic)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Make numpy arrays
X_titanic = np.array(X_titanic)
y_label = np.array(y_label)
#Rename and split into train and test
Tit_X_train, Tit_X_test, Tit_y_train, Tit_y_test = train_test_split(X_titanic, y_label, test_size=0.3)
print(len(Tit_X_train[0])) #input 27
print(len(Tit_y_train[0])) #output 2
print(Tit_X_train)

27
2
[[350.      42.       8.6625 ...   2.       0.       0.    ]
 [339.      45.       8.05   ...   2.       0.       0.    ]
 [748.      30.      13.     ...   2.       0.       0.    ]
 ...
 [728.      28.       7.7375 ...   1.       0.       0.    ]
 [959.      47.      42.4    ...   2.       0.       0.    ]
 [596.      36.      24.15   ...   2.       0.       0.    ]]


### Breast Cancer Wisconsin

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
breast_cancer_wisconsin_original = fetch_ucirepo(id=15)

# data (as pandas dataframes)
X_cancer = breast_cancer_wisconsin_original.data.features
y_cancer = breast_cancer_wisconsin_original.data.targets

# metadata
print(breast_cancer_wisconsin_original.metadata)

# variable information
print(breast_cancer_wisconsin_original.variables)

#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_cancer)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename and split into train and test
Cancer_X_train, Cancer_X_test, Cancer_y_train, Cancer_y_test = train_test_split(X_cancer, y_label, test_size=0.3)
print(Cancer_X_train)

{'uci_id': 15, 'name': 'Breast Cancer Wisconsin (Original)', 'repository_url': 'https://archive.ics.uci.edu/dataset/15/breast+cancer+wisconsin+original', 'data_url': 'https://archive.ics.uci.edu/static/public/15/data.csv', 'abstract': 'Original Wisconsin Breast Cancer Database', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 699, 'num_features': 9, 'feature_types': ['Integer'], 'demographics': [], 'target_col': ['Class'], 'index_col': ['Sample_code_number'], 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1990, 'last_updated': 'Fri Nov 03 2023', 'dataset_doi': '10.24432/C5HP4Z', 'creators': ['WIlliam Wolberg'], 'intro_paper': None, 'additional_info': {'summary': "Samples arrive periodically as Dr. Wolberg reports his clinical cases. The database therefore reflects this chronological grouping of the data. This grouping information appears immediately below, having been removed fro

  y = column_or_1d(y, warn=True)


### Pima Indian Diabetes

In [None]:
path = "/content/diabetes.csv"
X_diabetes, y_diabetes = kagData(path)


#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_diabetes)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename and split into train and test
Dia_X_train, Dia_X_test, Dia_y_train, Dia_y_test = train_test_split(X_diabetes, y_label, test_size=0.3)
print(Dia_X_train) # 8 inputs
print(Dia_y_train) # 2 outputs

[[3.0, 83.0, 58.0, 31.0, 18.0, 34.3, 0.336, 25.0], [2.0, 157.0, 74.0, 35.0, 440.0, 39.4, 0.134, 30.0], [1.0, 143.0, 74.0, 22.0, 61.0, 26.2, 0.256, 21.0], [7.0, 103.0, 66.0, 32.0, 0.0, 39.1, 0.344, 31.0], [4.0, 144.0, 58.0, 28.0, 140.0, 29.5, 0.287, 37.0], [5.0, 189.0, 64.0, 33.0, 325.0, 31.2, 0.583, 29.0], [3.0, 128.0, 72.0, 25.0, 190.0, 32.4, 0.549, 27.0], [2.0, 90.0, 60.0, 0.0, 0.0, 23.5, 0.191, 25.0], [2.0, 120.0, 54.0, 0.0, 0.0, 26.8, 0.455, 27.0], [0.0, 129.0, 80.0, 0.0, 0.0, 31.2, 0.703, 29.0], [5.0, 77.0, 82.0, 41.0, 42.0, 35.8, 0.156, 35.0], [3.0, 169.0, 74.0, 19.0, 125.0, 29.9, 0.268, 31.0], [10.0, 162.0, 84.0, 0.0, 0.0, 27.7, 0.182, 54.0], [8.0, 124.0, 76.0, 24.0, 600.0, 28.7, 0.687, 52.0], [13.0, 104.0, 72.0, 0.0, 0.0, 31.2, 0.465, 38.0], [1.0, 157.0, 72.0, 21.0, 168.0, 25.6, 0.123, 24.0], [0.0, 84.0, 82.0, 31.0, 125.0, 38.2, 0.233, 23.0], [2.0, 125.0, 60.0, 20.0, 140.0, 33.8, 0.088, 31.0], [0.0, 152.0, 82.0, 39.0, 272.0, 41.5, 0.27, 27.0], [13.0, 145.0, 82.0, 19.0, 110.0, 2

### Gime-Me-Some-Credit *

In [None]:
path = "/content/credit_test.csv"
X_credit_train, y_credit_train = kagData(path)
#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_credit_train)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename
Credit_X_train = X_credit_train
Credit_y_train = y_label

In [None]:
path = "/content/credit_training.csv"
X_credit_test, y_credit_test = kagData(path)


### Poker Hand

In [None]:
path = "/content/poker-hand-training-true.data"
names = ["S1", "C1", "S2", "C2","S3", "C3", "S4", "C4","S5", "C5", "Class"]
X_poker_train, y_poker_train = uciData(path, names)

In [None]:
path = "/content/poker-hand-testing.data"
names = ["S1", "C1", "S2", "C2","S3", "C3", "S4", "C4","S5", "C5", "Class"]
X_poker_test, y__poker_test = uciData(path, names)

### German Credit Data

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
statlog_german_credit_data = fetch_ucirepo(id=144)

# data (as pandas dataframes)
X_german = statlog_german_credit_data.data.features
y_german = statlog_german_credit_data.data.targets

# metadata
print(statlog_german_credit_data.metadata)

# variable information
print(statlog_german_credit_data.variables)

#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_german)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename and split into train and test
German_X_train, German_X_test, German_y_train, German_y_test = train_test_split(X_german, y_label, test_size=0.3)


{'uci_id': 144, 'name': 'Statlog (German Credit Data)', 'repository_url': 'https://archive.ics.uci.edu/dataset/144/statlog+german+credit+data', 'data_url': 'https://archive.ics.uci.edu/static/public/144/data.csv', 'abstract': 'This dataset classifies people described by a set of attributes as good or bad credit risks. Comes in two formats (one all numeric). Also comes with a cost matrix', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1000, 'num_features': 20, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Other', 'Marital Status', 'Age', 'Occupation'], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1994, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5NC77', 'creators': ['Hans Hofmann'], 'intro_paper': None, 'additional_info': {'summary': 'Two datasets are provided.  the original dataset, in the form provided by

  y = column_or_1d(y, warn=True)


### Image Segmentation

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
image_segmentation = fetch_ucirepo(id=50)

# data (as pandas dataframes)
X_image = image_segmentation.data.features
y_image = image_segmentation.data.targets

# metadata
print(image_segmentation.metadata)

# variable information
print(image_segmentation.variables)

#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_image)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename and split into train and test
Image_X_train, Image_X_test, Image_y_train, Image_y_test = train_test_split(X_image, y_label, test_size=0.3)


{'uci_id': 50, 'name': 'Image Segmentation', 'repository_url': 'https://archive.ics.uci.edu/dataset/50/image+segmentation', 'data_url': 'https://archive.ics.uci.edu/static/public/50/data.csv', 'abstract': 'Image data described by high-level numeric-valued attributes, 7 classes', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 2310, 'num_features': 19, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1990, 'last_updated': 'Fri Oct 27 2023', 'dataset_doi': '10.24432/C5GP4N', 'creators': [], 'intro_paper': None, 'additional_info': {'summary': 'The instances were drawn randomly from a database of 7 outdoor images.  The images were handsegmented to create a classification for every pixel.  \r\n\r\n   Each instance is a 3x3 region.', 'purpose': None, 'funded_by': None, 'instances_represent': None, 'recommended_dat

  y = column_or_1d(y, warn=True)


### Covertype

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
covertype = fetch_ucirepo(id=31)

# data (as pandas dataframes)
X_covertype = covertype.data.features
y_covertype = covertype.data.targets
print(y_covertype)
print(X_covertype)
# metadata
print(covertype.metadata)

# variable information
print(covertype.variables)

#Onehot encoding
y_enc = LabelEncoder().fit_transform(y_covertype)
# Converting the label into a matrix form
y_label = tf.keras.utils.to_categorical(y_enc)

#Rename and split into train and test
Cover_X_train, Cover_X_test, Cover_y_train, Cover_y_test = train_test_split(X_covertype, y_label, test_size=0.3)


None
             Elevation  Aspect  Slope  Horizontal_Distance_To_Hydrology  \
2596 51  3         258       0    510                               221   
2590 56  2         212      -6    390                               220   
2804 139 9         268      65   3180                               234   
2785 155 18        242     118   3090                               238   
2595 45  2         153      -1    391                               220   
...                ...     ...    ...                               ...   
2396 153 20         85      17    108                               240   
2391 152 19         67      12     95                               240   
2386 159 17         60       7     90                               236   
2384 170 15         60       5     90                               230   
2383 165 13         60       4     67                               231   

             Vertical_Distance_To_Hydrology  Horizontal_Distance_To_Roadways  \
2596 51  3    

ValueError: ignored

#Parameter Tuning
We need to find the optimal hyper-parameters for each model (neural network, decision tree and neural decision tree) for each dataset we are looking at.

##Deep Neural Decision Tree

###Iris Dataset

In [None]:
def iris_hyper_tune():
  iris_model = DNDT(num_classes = 3,num_features = 4, temperature = 0.2, learning_rate = 0.05, iters = 1200, epsilon = 0.1)
  iris_model.fit(Iris_X_train,Iris_y_train, Iris_X_test, Iris_y_test)
  iris_model.closeSession()

iris_hyper_tune ()

0 %
Loss:  1.1283684
Accuracy:  0.34285714285714286


25 %
Loss:  0.17426483
Accuracy:  0.9619047619047619


50 %
Loss:  0.118160404
Accuracy:  0.9619047619047619


75 %
Loss:  0.11770253
Accuracy:  0.9619047619047619


Final Test Accuracy:  0.9333333333333333


##Neural Network

###Iris Dataset

In [None]:
import tensorflow as tf

def iris_hyper_tune():
  #So, excluding learning rate which is determined with ADAM, so we'll just find hyper parameters for
  # epochs, number of hidden layers, units per layer, and dropout rate

  #Epochs
  max = [0]*3
  for i in [15,20,25,30]:
    iris_model_epoch = NeuralNetwork(2,[500,500],4,0.1,3)
    iris_model_epoch.validationfitting(Iris_X_train,Iris_y_train, Iris_X_test, Iris_y_test, False, i)
    current = iris_model_epoch.test(Iris_X_test,Iris_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of epochs to run is:" ,max[0],"\n Accuracy: ",max[1])

  #Number of Hidden Layers
  max = [0]*3
  for i in [1,2,3,4]:
    units_layer = [500]*i
    iris_model_hidden_layers = NeuralNetwork(i,units_layer, 4,0.1,3)
    iris_model_hidden_layers.validationfitting(Iris_X_train,Iris_y_train, Iris_X_test, Iris_y_test, False, 30)
    current = iris_model_hidden_layers.test(Iris_X_test,Iris_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of hidden layers to run is:",max[0],"\n Accuracy: ",max[1])

  #Units per Layer
  max = [0]*3
  for i in [100,300,500,1000]:
    units_layer = [i]*3
    iris_model_units = NeuralNetwork(3,units_layer, 4,0.1,3)
    iris_model_units.validationfitting(Iris_X_train,Iris_y_train, Iris_X_test, Iris_y_test, False, 30)
    current = iris_model_units.test(Iris_X_test,Iris_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of units per layer to run is:",max[0],"\n Accuracy: ",max[1])

  #Dropout Rate
  max = [0]*3
  for i in [0,0.1,0.2,0.3]:
    iris_model_dropout = NeuralNetwork(3, [500,500,500], 4, i, 3)
    iris_model_dropout.validationfitting(Iris_X_train,Iris_y_train, Iris_X_test, Iris_y_test, False, 30)
    current = iris_model_dropout.test(Iris_X_test,Iris_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best dropout rate to run is: ",max[0],"\n Accuracy: ",max[1])



iris_hyper_tune()

2/2 - 0s - loss: 0.4201 - accuracy: 0.9333 - 26ms/epoch - 13ms/step
2/2 - 0s - loss: 0.3491 - accuracy: 0.9778 - 44ms/epoch - 22ms/step
2/2 - 0s - loss: 0.3133 - accuracy: 1.0000 - 32ms/epoch - 16ms/step
2/2 - 0s - loss: 0.2853 - accuracy: 0.9556 - 40ms/epoch - 20ms/step
The best number of epochs to run is: 25 
 Accuracy:  1.0
2/2 - 0s - loss: 0.2554 - accuracy: 1.0000 - 27ms/epoch - 14ms/step
2/2 - 0s - loss: 0.2641 - accuracy: 0.9778 - 31ms/epoch - 15ms/step
2/2 - 0s - loss: 0.0653 - accuracy: 0.9778 - 28ms/epoch - 14ms/step
2/2 - 0s - loss: 0.0399 - accuracy: 1.0000 - 29ms/epoch - 15ms/step
The best number of hidden layers to run is: 4 
 Accuracy:  1.0
2/2 - 0s - loss: 0.1954 - accuracy: 0.9778 - 34ms/epoch - 17ms/step
2/2 - 0s - loss: 0.0682 - accuracy: 1.0000 - 27ms/epoch - 14ms/step
2/2 - 0s - loss: 0.0618 - accuracy: 0.9778 - 51ms/epoch - 26ms/step
2/2 - 0s - loss: 0.0733 - accuracy: 0.9556 - 30ms/epoch - 15ms/step
The best number of units per layer to run is: 300 
 Accuracy:  1

###Haberman's Survival

In [None]:
def haber_hypertuning():
  #So, excluding learning rate which is determined with ADAM, so we'll just find hyper parameters for
  # epochs, number of hidden layers, units per layer, and dropout rate
  print("Hyperparameter for Haberman's Survival")
  #Epochs
  max = [0]*3
  for i in [15,20,25,30]:
    haber_model_epoch = NeuralNetwork(2,[500,500],3,0.1,2)
    haber_model_epoch.validationfitting(Haber_X_train,Haber_y_train, Haber_X_test, Haber_y_test, False, i)
    current = haber_model_epoch.test(Haber_X_test,Haber_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of epochs to run is:" ,max[0],"\n Accuracy: ",max[1])

  #Number of Hidden Layers
  max = [0]*3
  for i in [1,2,3,4]:
    units_layer = [500]*i
    haber_model_hidden_layers = NeuralNetwork(i,units_layer, 3,0.1,2)
    haber_model_hidden_layers.validationfitting(Haber_X_train,Haber_y_train, Haber_X_test, Haber_y_test, False, 30)
    current = haber_model_hidden_layers.test(Haber_X_test,Haber_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of hidden layers to run is:",max[0],"\n Accuracy: ",max[1])

  #Units per Layer
  max = [0]*3
  for i in [100,300,500,1000]:
    units_layer = [i]*3
    haber_model_units = NeuralNetwork(3,units_layer, 3,0.1,2)
    haber_model_units.validationfitting(Haber_X_train,Haber_y_train, Haber_X_test, Haber_y_test, False, 30)
    current = haber_model_units.test(Haber_X_test,Haber_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of units per layer to run is:",max[0],"\n Accuracy: ",max[1])

  #Dropout Rate
  max = [0]*3
  for i in [0,0.1,0.2,0.3]:
    haber_model_dropout = NeuralNetwork(3, [500,500,500], 3, i, 2)
    haber_model_dropout.validationfitting(Haber_X_train,Haber_y_train, Haber_X_test, Haber_y_test, False, 30)
    current = haber_model_dropout.test(Haber_X_test,Haber_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best dropout rate to run is: ",max[0],"\n Accuracy: ",max[1])



haber_hypertuning ()

Hyperparameter for Haberman's Survival
3/3 - 0s - loss: 0.5375 - accuracy: 0.7500 - 28ms/epoch - 9ms/step
3/3 - 0s - loss: 0.5402 - accuracy: 0.7500 - 29ms/epoch - 10ms/step
3/3 - 0s - loss: 0.5253 - accuracy: 0.7609 - 33ms/epoch - 11ms/step
3/3 - 0s - loss: 0.5503 - accuracy: 0.7500 - 31ms/epoch - 10ms/step
The best number of epochs to run is: 25 
 Accuracy:  0.760869562625885
3/3 - 0s - loss: 0.6102 - accuracy: 0.7391 - 31ms/epoch - 10ms/step
3/3 - 0s - loss: 0.5990 - accuracy: 0.7500 - 32ms/epoch - 11ms/step
3/3 - 0s - loss: 0.5904 - accuracy: 0.7500 - 32ms/epoch - 11ms/step
3/3 - 0s - loss: 0.5935 - accuracy: 0.7500 - 32ms/epoch - 11ms/step
The best number of hidden layers to run is: 3 
 Accuracy:  0.75
3/3 - 0s - loss: 0.5960 - accuracy: 0.7391 - 44ms/epoch - 15ms/step
3/3 - 0s - loss: 0.6028 - accuracy: 0.7500 - 38ms/epoch - 13ms/step
3/3 - 0s - loss: 0.5922 - accuracy: 0.7174 - 32ms/epoch - 11ms/step
3/3 - 0s - loss: 0.5755 - accuracy: 0.7174 - 35ms/epoch - 12ms/step
The best nu

###Titanic

In [None]:
def tit_hypertuning():
  #So, excluding learning rate which is determined with ADAM, so we'll just find hyper parameters for
  # epochs, number of hidden layers, units per layer, and dropout rate
  print("Hyperparameters for Car Evaluation")

  #Epochs
  max = [0]*3
  for i in [15,20,25,30]:
    tit_model_epoch = NeuralNetwork(2,[500,500],27,0.1,2)
    tit_model_epoch.validationfitting(Tit_X_train,Tit_y_train, Tit_X_test, Tit_y_test, False, i)
    current = tit_model_epoch.test(Tit_X_test,Tit_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of epochs to run is:" ,max[0],"\n Accuracy: ",max[1])

  #Number of Hidden Layers
  max = [0]*3
  for i in [1,2,3,4]:
    units_layer = [500]*i
    tit_model_hidden_layers = NeuralNetwork(i,units_layer, 27,0.1,2)
    tit_model_hidden_layers.validationfitting(Tit_X_train,Tit_y_train, Tit_X_test, Tit_y_test, False, 30)
    current = tit_model_hidden_layers.test(Tit_X_test,Tit_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of hidden layers to run is:",max[0],"\n Accuracy: ",max[1])

  #Units per Layer
  max = [0]*3
  for i in [100,300,500,1000]:
    units_layer = [i]*3
    tit_model_units = NeuralNetwork(3,units_layer, 27,0.1,2)
    tit_model_units.validationfitting(Tit_X_train,Tit_y_train, Tit_X_test, Tit_y_test, False, 30)
    current = tit_model_units.test(Tit_X_test,Tit_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best number of units per layer to run is:",max[0],"\n Accuracy: ",max[1])

  #Dropout Rate
  max = [0]*3
  for i in [0,0.1,0.2,0.3]:
    tit_model_dropout = NeuralNetwork(3, [500,500,500], 27, i, 2)
    tit_model_dropout.validationfitting(Tit_X_train,Tit_y_train, Tit_X_test, Tit_y_test, False, 30)
    current = tit_model_dropout.test(Tit_X_test,Tit_y_test)
    if max[1] < current[1]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
    elif max[1] == current[1] and current[0] < max[2]:
      max[0] = i
      max[1] = current[1]
      max[2] = current[0]
  print("The best dropout rate to run is: ",max[0],"\n Accuracy: ",max[1])



tit_hypertuning()

Hyperparameters for Car Evaluation
13/13 - 0s - loss: nan - accuracy: 0.7481 - 49ms/epoch - 4ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 47ms/epoch - 4ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 44ms/epoch - 3ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 41ms/epoch - 3ms/step
The best number of epochs to run is: 15 
 Accuracy:  0.7480915784835815
13/13 - 0s - loss: nan - accuracy: 0.7481 - 61ms/epoch - 5ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 42ms/epoch - 3ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 51ms/epoch - 4ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 90ms/epoch - 7ms/step
The best number of hidden layers to run is: 1 
 Accuracy:  0.7480915784835815
13/13 - 0s - loss: nan - accuracy: 0.7481 - 41ms/epoch - 3ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 45ms/epoch - 3ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 74ms/epoch - 6ms/step
13/13 - 0s - loss: nan - accuracy: 0.7481 - 81ms/epoch - 6ms/step
The best number of unit