# Installation of Library

In [1]:
# Only necessary to run once (to install the software on your computer)

In [None]:
!pip install --upgrade pip
!pip install git+https://github.com/keras-team/keras-tuner.git
!pip install autokeras

# Importing of Relevant Libraries

In [None]:
# Imports necessary for Utilization of Dataframe/Visualization
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import pandas as pd
from pandasql import sqldf
import plotly.express as px

# Keras (Neural Network) Necessary Imports
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Results Necessary Inputs
import operator
import os

# AutoKeras Necessary Inputs
import tensorflow as tf
import autokeras as ak
from tensorflow.keras.models import load_model

# Helper Function Definitions

In [2]:
# Helper functions that will help assess effectiveness of the models that I used

In [290]:
def calculatePercentAccuracyModelNew(predictionArray, y_test):
    totalComparisons = 0
    correctComparisons = 0
    
    for i in range(len(predictionArray)):
        totalComparisons += 1
        if predictionArray[i] == y_test[i]:
            correctComparisons += 1
    
    percentageCorrect = correctComparisons/totalComparisons
    return percentageCorrect

In [111]:
# Turns an array that has decimal values between 0 and 1
# into either 0 or 1 and returns the rounded array
def roundArrayNew(rawPredictionArray):
    predictionsRounded = []
    for i in rawPredictionArray:
        currentValue = i[0]
        if currentValue < 0.5:
            predictionsRounded.append(math.floor(currentValue))
        else:
            predictionsRounded.append(math.ceil(currentValue))
    return predictionsRounded

# Data Preparation

In [None]:
# Note, this data currently will not work, but will work after an XValues and YValues dataframe is initialized
X_train, X_test, y_train, y_test = train_test_split(XValues, YValues, test_size=0.2)

# AutoKeras Code

## Create and Export the Model

In [None]:
# Initialize the structured data classifier.
classifierModel = ak.StructuredDataClassifier(
    overwrite=True, max_trials=15
)  # It tries 15 different models.
# Feed the structured data classifier with training data.
classifierModel.fit(X_train, y_train, epochs=10)

# Predict with the best model.
predictedYValues = classifierModel.predict(X_test)
# Evaluate the best model with testing data.
print(classifierModel.evaluate(X_test, y_test))

In [None]:
model = classifierModel.export_model()
model.summary()

In [None]:
predictedValues = model.predict(X_test.astype(str))
# Note: Tensorflow only allows the use of 1 datatype for input, either numerical (ideally float32) or categorical (str) 

In [None]:
print(calculatePercentAccuracyModelNew(roundArrayNew(predictedValues), np.array(y_test)))
# determines actual effectiveness of the model as related to the dataset

In [413]:
# To study why the model reacts a certain way to the same input group, one can export the training + testing data to analyze specific
# model decisions
X_train.to_csv('X_train.csv',index=False)
X_test.to_csv('X_test.csv',index=False)
y_train.to_csv('y_train.csv',index=False)
y_test.to_csv('y_test.csv',index=False)
# X_trainR1, X_testR1, y_trainR1, y_testR1

In [None]:
try:
    model.save("autokeras_model", save_format="tf")
except Exception:
    model.save("autokeras_model.h5")

## Import Created Model

In [417]:
X_train = pd.read_csv("X_train.csv", header = 0)
X_test = pd.read_csv("X_test.csv", header = 0)
y_train = pd.read_csv("y_train.csv", header = 0)
y_test = pd.read_csv("y_test.csv", header = 0)

In [425]:
loaded_model = load_model("autokeras_model", custom_objects=ak.CUSTOM_OBJECTS)

In [426]:
# what must be noted is that tensors can only be of one type; hence string seems like the most natural fit in this case
predicted_y = loaded_model.predict(X_test.astype(str))



In [428]:
print(calculatePercentAccuracyModelNew(roundArrayNew(predicted_y), np.array(y_test)))
# Based on current dataset, the value is 0.8153846153846154

0.6960352422907489
