In [1]:
import numpy as np # arrays & loading data

from sklearn.model_selection import train_test_split # we will split the data in 3 (training + counter variation + testing data)
from sklearn.preprocessing import StandardScaler # z-score normalization & polynomianls classes 

import tensorflow as tf # for building and training neural networks

from utils import build_models 

# reduce display precision on numpy arrays
np.set_printoptions(precision=2)

# suppress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

In [2]:
# loading all the data
data = np.loadtxt('./food_data.csv', delimiter=',', skiprows=1, usecols=range(1, 7))

X = data[:,:-1] # forming the input and output 
y= data[:,-1]

y= np.expand_dims(y, axis=1) # make y 2D - the commands later will require it

print(f"shape of input X is: {X.shape}")
print(f"shape of output y is: {y.shape}")

shape of input X is: (25, 5)
shape of output y is: (25, 1)


In [3]:
# split into training , cross validation and test sets

# TRAINING SET - 60%
X_train, X_temporary, y_train, y_temporary = train_test_split(X, y, test_size=0.40, random_state=1)

# the rest of 40% - CV SET(20%) and TEST SET(20%)
X_cv, X_test, y_cv, y_test = train_test_split(X_temporary, y_temporary, test_size=0.50, random_state=1)
del X_temporary, y_temporary

print(f"training input shape:{X_train.shape}")
print(f"training output shape:{y_train.shape}")
print(f"cv input shape:{X_cv.shape}")
print(f"cv output shape:{y_cv.shape}")
print(f"test input shape:{X_test.shape}")
print(f"test output shape:{y_test.shape}")

training input shape:(15, 5)
training output shape:(15, 1)
cv input shape:(5, 5)
cv output shape:(5, 1)
test input shape:(5, 5)
test output shape:(5, 1)


In [4]:
# TRAINING DATA, CV DATA & TEST DATA OPERATIONS
# applying z-score to all the training data - make it compact for the algorithms to work better
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_cv_scaled = standard_scaler.transform(X_cv) 
X_test_scaled = standard_scaler.transform(X_test)
print(X_train_scaled)

[[ 8.40e-02  1.98e-01 -6.69e-01 -5.47e-01  3.05e+00]
 [-6.28e-01 -5.45e-01 -4.66e-01 -2.22e-01 -5.85e-01]
 [-6.08e-01 -5.28e-01 -3.60e-01 -2.27e-01 -5.10e-01]
 [ 9.10e-01 -1.57e-01  6.95e-01 -5.60e-01 -1.48e-01]
 [-6.08e-01 -5.45e-01 -5.06e-01 -3.65e-01 -5.55e-01]
 [-2.26e-01 -5.11e-01 -6.15e-01 -5.34e-01  2.61e-03]
 [-5.87e-01 -5.28e-01 -6.19e-01 -3.61e-01 -5.85e-01]
 [ 1.63e+00  2.05e+00 -7.71e-01 -5.43e-01  1.60e+00]
 [-5.35e-01 -4.44e-01 -6.15e-01 -3.56e-01 -1.48e-01]
 [-1.22e-01  2.47e-01  2.84e+00  3.19e+00 -6.30e-01]
 [ 2.89e+00  2.83e+00  1.37e+00  1.66e+00 -7.27e-02]
 [-2.99e-01 -4.27e-01  1.04e+00 -3.86e-01  4.91e-01]
 [-6.39e-01 -5.45e-01 -6.51e-01 -4.17e-01 -6.61e-01]
 [-6.39e-01 -5.45e-01 -4.33e-01 -1.83e-01 -6.45e-01]
 [-6.28e-01 -5.45e-01 -2.51e-01 -1.49e-01 -6.00e-01]]


In [9]:
# calculate errors for some models and chose the one with the best neural network
train_errors=[]
cv_errors=[]    # initialize lists that will contain a history of error numbers at each iteration

# this is the list of all the models we will check
models = build_models()

#loop through all the models
for model in models:
    # compile - set the loss and optimizer
    model.compile(
        # this is similar to gradient descent, but it is a much improved version
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), # BC - binary class & from_logits helps our calculations be more accurate
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.03), # preimplemented optimizer
    )
    
    # train the model "epochs" times
    model.fit(
        X_train_scaled, y_train,
        epochs=200,
        verbose=0
    )
    # threshold 0.5 because we are in a binary class example

    # fraction of misclassified outputs in the TRAINING SET
    y_predicted = model.predict(X_train_scaled)
    y_predicted = tf.math.sigmoid(y_predicted) # classify above or below the threshold
    # classic way of calculating the error in a classification case
    y_predicted = np.where(y_predicted>=0.5,1,0)
    error = np.mean(y_predicted != y_train) # arithmetic mean, where the numerator is the count of wrong predictions
    train_errors.append(error)
    
    # fraction of misclassified outputs in the CV SET
    y_predicted = model.predict(X_cv_scaled)
    y_predicted = tf.math.sigmoid(y_predicted) 
    y_predicted = np.where(y_predicted>=0.5,1,0)
    error2 = np.mean(y_predicted != y_cv) # arithmetic mean, where the numerator is the count of wrong predictions
    cv_errors.append(error2)
    
# Print the result
for model_num in range(len(train_errors)):
    print(
        f"Model {model_num+1}: Training Set Classification Error: {train_errors[model_num]:.5f}, " +
        f"CV Set Classification Error: {cv_errors[model_num]:.5f}"
        )

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Model 1: Training Set Classification Error: 0.00000, CV Set Classification Error: 0.20000
Model 2: Training Set Classification Error: 0.00000, CV Set Classification Error: 0.20000
Model 3: Training Set Classification Error: 0.00000, CV Set Classification Error: 0.20000
Model 4: T

In [10]:
# choose model that performed the best
index = 2

# test error calculated as cv error from above
y_predicted = models[index-1].predict(X_test_scaled)
y_predicted = tf.math.sigmoid(y_predicted) 
y_predicted = np.where(y_predicted>=0.5,1,0)
error_test = np.mean(y_predicted != y_cv) 

print(f"Test Set Classification Error for model {index}: {error_test}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
Test Set Classification Error for model 2: 0.6


In [13]:
# user interaction
total_fat = float(input("Total fat :"))
sat_fat = float(input("Saturated fat :"))
carbo = float(input("Carbohydrates :"))
sugars = float(input("Total sugars :"))
protein = float(input("Protein :"))

x_user = np.array([[total_fat, sat_fat, carbo, sugars, protein]]) # we need a 2D array 
x_user_scaled = standard_scaler.transform(x_user)
y_predicted = models[index-1].predict(x_user_scaled)
y_predicted = tf.math.sigmoid(y_predicted) 
print(y_predicted)
if y_predicted[0] >= 0.5:
    print("This food is healthy.")
else:
     print("This food is unhealthy.")

Total fat : 1
Saturated fat : 0.6
Carbohydrates : 5
Total sugars : 5
Protein : 3.4


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
tf.Tensor([[1.]], shape=(1, 1), dtype=float32)
This food is healthy.
