In [4]:
import numpy as np # arrays & loading data

from sklearn.model_selection import train_test_split # we will split the data in 3 (training + cross validation + testing data)
from sklearn.preprocessing import StandardScaler # z-score normalization & polynomials classes 

import tensorflow as tf # for building and training neural networks

from utils import build_models # here we have a bunch of models to try

# reduce display precision on numpy arrays
np.set_printoptions(precision=2)

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [5]:
# loading all the data
data = np.loadtxt('./food_data.csv', delimiter=',', skiprows=1, usecols=range(1, 7))

X = data[:,:-1] # forming the input and output 
y= data[:,-1]

y= np.expand_dims(y, axis=1) # make y 2D - the commands later will require it

print(f"shape of input X is: {X.shape}")
print(f"shape of output y is: {y.shape}")

shape of input X is: (47, 5)
shape of output y is: (47, 1)


In [6]:
# split into training , cross validation and test sets

# TRAINING SET - 60%
X_train, X_temporary, y_train, y_temporary = train_test_split(X, y, test_size=0.40, random_state=1)

# the rest of 40% - CV SET(20%) and TEST SET(20%)
X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=0.50, random_state=1)
del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
print(f"test input shape:{X_test.shape}")
print(f"test output shape:{y_test.shape}")

training input shape:(28, 5)
training output shape:(28, 1)
cv input shape:(9, 5)
cv output shape:(9, 1)
test input shape:(10, 5)
test output shape:(10, 1)


In [7]:
# TRAINING DATA, CV DATA & TEST DATA OPERATIONS
# applying z-score to all the training data - adjust the data based on its distribution for the algorithms to work better (converge faster) 
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_cv_scaled = standard_scaler.transform(X_cv) 
X_test_scaled = standard_scaler.transform(X_test)


In [8]:
# calculate errors for some models and chose the one with the best neural network
train_errors=[]
cv_errors=[]    # initialize lists that will contain a history of error numbers at each iteration

# this is the list of all the models we will check
models = build_models()

#loop through all the models
for model in models:
    # compile - set the loss and optimizer
    model.compile(
        # this is similar to gradient descent, but it is a much improved version
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), # BC - binary class & from_logits helps our calculations be more accurate
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.03), # preimplemented optimizer
    )
    
    # train the model "epochs" times
    model.fit(
        X_train_scaled, y_train,
        epochs=200,
        verbose=0
    )
    # threshold 0.5 because we are in a binary class example

    # fraction of misclassified outputs in the TRAINING SET
    y_predicted = model.predict(X_train_scaled)
    y_predicted = tf.math.sigmoid(y_predicted) # classify above or below the threshold
    # classic way of calculating the error in a classification case
    y_predicted = np.where(y_predicted>=0.5,1,0)
    error_training = np.mean(y_predicted != y_train) # arithmetic mean, where the numerator is the count of wrong predictions
    train_errors.append(error_training)
    
    # fraction of misclassified outputs in the CV SET
    y_predicted = model.predict(X_cv_scaled)
    y_predicted = tf.math.sigmoid(y_predicted) 
    y_predicted = np.where(y_predicted>=0.5,1,0)
    error_cv = np.mean(y_predicted != y_cv) # arithmetic mean, where the numerator is the count of wrong predictions
    cv_errors.append(error_cv)
    
# Print the result
for model_num in range(len(train_errors)):
    print(
        f"Model {model_num+1}: Training Set Classification Error: {train_errors[model_num]:.5f}, " +
        f"CV Set Classification Error: {cv_errors[model_num]:.5f}"
        )

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
Model 1: Training Set Classification Error: 0.00000, CV Set Classification Error: 0.11111
Model 2: Training Set Classification Error: 0.00000, CV Set Classification Error: 0.11111
Model 3: Training Set Classification Error: 0.00000, CV Set Classification Error: 0.22222
Model 4: T

In [9]:
# choose model that performed the best
index = 5

# test error calculated as cv error from above
y_predicted = models[index-1].predict(X_test_scaled)
y_predicted = tf.math.sigmoid(y_predicted) 
y_predicted = np.where(y_predicted>=0.5,1,0)
error_test = np.mean(y_predicted != y_test) 

print(f"Test Set Classification Error for model {index}: {error_test}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Test Set Classification Error for model 5: 0.2


In [10]:
# user interaction
total_fat = float(input("Total fat :"))
sat_fat = float(input("Saturated fat :"))
carbo = float(input("Carbohydrates :"))
sugars = float(input("Total sugars :"))
protein = float(input("Protein :"))

x_user = np.array([[total_fat, sat_fat, carbo, sugars, protein]]) # we need a 2D array 

x_user_scaled = standard_scaler.transform(x_user)
y_predicted = models[index-1].predict(x_user_scaled) # scale and use the model exactly as before


y_predicted = tf.math.sigmoid(y_predicted) # classify below or above the threshold 0.5
print(y_predicted)
if y_predicted[0] >= 0.5:
    print("This food is healthy.")
else:
     print("This food is unhealthy.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
tf.Tensor([[0.]], shape=(1, 1), dtype=float32)
This food is unhealthy.
