In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, GlobalMaxPooling2D, concatenate, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from sklearn.metrics import mean_squared_error

# read the csv file into a dataframe
df = pd.read_csv("BMI/Data/data.csv")

In [2]:
df

Unnamed: 0.1,Unnamed: 0,bmi,gender,is_training,name
0,0,34.207396,Male,1,img_0.bmp
1,1,26.453720,Male,1,img_1.bmp
2,2,34.967561,Female,1,img_2.bmp
3,3,22.044766,Female,1,img_3.bmp
4,4,37.758789,Female,1,img_4.bmp
...,...,...,...,...,...
4201,4201,34.078947,Male,0,img_4201.bmp
4202,4202,34.564776,Female,0,img_4202.bmp
4203,4203,27.432362,Female,0,img_4203.bmp
4204,4204,40.492800,Male,0,img_4204.bmp


In [3]:
# create a new column called "Image_exists" to store whether the corresponding image file exists
df["Image_exists"] = df["name"].apply(lambda x: os.path.exists("BMI/Data/Images/" + x))

In [4]:
df.shape

(4206, 6)

In [5]:
# filter the dataframe to keep only the rows where "Image_exists" is True
df = df[df["Image_exists"] == True]

In [6]:
df.shape

(3962, 6)

In [7]:
# drop the "Image_exists" column since it's no longer needed
df.drop("Image_exists", axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop("Image_exists", axis=1, inplace=True)


In [8]:
df

Unnamed: 0.1,Unnamed: 0,bmi,gender,is_training,name
0,0,34.207396,Male,1,img_0.bmp
1,1,26.453720,Male,1,img_1.bmp
2,2,34.967561,Female,1,img_2.bmp
3,3,22.044766,Female,1,img_3.bmp
6,6,25.845588,Female,1,img_6.bmp
...,...,...,...,...,...
4201,4201,34.078947,Male,0,img_4201.bmp
4202,4202,34.564776,Female,0,img_4202.bmp
4203,4203,27.432362,Female,0,img_4203.bmp
4204,4204,40.492800,Male,0,img_4204.bmp


In [9]:
# Define the input shape of the images
img_width, img_height = 224, 224
input_shape = (img_width, img_height, 3)

# Define the batch size for training and testing
batch_size = 32

# Define the number of epochs for training
epochs = 20

In [10]:
from tensorflow.keras.applications import ResNet50

# Create an instance of the VGG16 model pre-trained on ImageNet
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

for layer in base_model.layers[:-4]:  # Freeze all layers except the last four
    layer.trainable = False

In [11]:
from tensorflow.keras import regularizers

# Regularization strength
l2_lambda = 0.01

from tensorflow import keras
from tensorflow.keras import layers, models

# Additional layers
layer_2d_1 = layers.Conv2D(filters=32, kernel_size=(2, 2), activation='relu')
layer_2d_2 = layers.MaxPooling2D(pool_size=(2, 2))
layer_2d_3 = layers.Conv2D(filters=64, kernel_size=(2, 2), activation='relu')
layer_2d_4 = layers.MaxPooling2D(pool_size=(2, 2))
flatten_layer = layers.Flatten()
dense_layer_1 = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_lambda))
dropout_layer_1 = layers.Dropout(0.3)
dense_layer_2 = layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(l2_lambda))
dropout_layer_2 = layers.Dropout(0.3)
prediction_layer = layers.Dense(1, activation='linear')

# Construct the model
model = models.Sequential([
    base_model,
    layer_2d_1,
    layer_2d_2,
    layer_2d_3,
    layer_2d_4,
    flatten_layer,
    dense_layer_1,
    dropout_layer_1,
    dense_layer_2,
    dropout_layer_2,
    prediction_layer
])

In [12]:
# Compile the model with a mean squared error loss function and an Adam optimizer
model.compile(loss='mean_squared_error', optimizer="adam")

In [13]:
# Create an instance of the ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

In [14]:
# Generate the training and testing data from the images
train_generator = datagen.flow_from_dataframe(
    dataframe=df[df['is_training']==1],
    directory="BMI/Data/Images/",
    x_col="name",
    y_col="bmi",
    #subset="training",
    batch_size=batch_size,
    #seed=42,
    #shuffle=True,
    class_mode="raw",
    target_size=(img_width, img_height))

Found 3210 validated image filenames.


In [15]:
test_generator = datagen.flow_from_dataframe(
    dataframe=df[df['is_training']==0],
    directory="BMI/Data/Images/",
    x_col="name",
    y_col="bmi",
    #subset="validation",
    batch_size=batch_size,
    #seed=42,
    #shuffle=True,
    class_mode="raw",
    target_size=(img_width, img_height))

Found 752 validated image filenames.


In [16]:
# Create an instance of the EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)

# Train the model on the training data
history = model.fit(train_generator,
                    epochs=epochs,
                    validation_data=test_generator,
                    callbacks=[early_stopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20


In [17]:
# Create a new dataframe for the predictions
pred_df = pd.DataFrame()

# Add the true labels to the dataframe
pred_df['True Label'] = test_generator.labels

# Make predictions on the testing data and add them to the dataframe
pred_df['Predicted Label'] = model.predict(test_generator).flatten()

# Display the dataframe
pred_df.head(30)



Unnamed: 0,True Label,Predicted Label
0,29.698495,29.36586
1,30.845918,33.598465
2,24.389796,28.579241
3,36.258679,26.872124
4,27.891291,26.818781
5,36.743467,29.340244
6,23.370844,32.530769
7,60.096828,31.250244
8,34.256669,29.017654
9,28.884838,33.687099


In [18]:
# Evaluate the model on the testing data
y_true = test_generator.labels
y_pred = model.predict(test_generator)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
print("RMSE:", rmse)

RMSE: 10.303861236824362


In [20]:
model.save('vgg2_model.h5')

In [22]:
#code for making real-time predictions on webcam footage within the notebook 

import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model('vgg2_model.h5')

# Load the Haar cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Function to preprocess the input image
def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB format
    img = cv2.resize(img, (224, 224))  # Resize to match input shape of the model
    img = img / 255.0  # Normalize pixel values
    return img

# Function to predict BMI from an image
def predict_bmi(image):
    image = preprocess_image(image)
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    prediction = model.predict(image)[0][0]
    return prediction

# Open the webcam
cap = cv2.VideoCapture(0)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Convert the frame to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region from the frame
        face_img = frame[y:y + h, x:x + w]

        # Make prediction
        bmi = predict_bmi(face_img)

        # Display the bounding box around the face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Display the BMI on the frame
        cv2.putText(frame, f'BMI: {bmi:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('Webcam', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close the windows
cap.release()
cv2.destroyAllWindows()


