## BMI

### Model - 

### Input - Images

In [None]:
import dlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# uncomment it for your first try
# ! wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
# ! bzip2 -d shape_predictor_68_face_landmarks.dat.bz2

In [None]:
# Read the DataFrame from the pickle file
with open('custom_dataset.pkl', 'rb') as f:
    loaded_df = pickle.load(f)

In [None]:
# Convert the list to a 2D matrix of shape (512, 512)
loaded_df['image_front_data'] = loaded_df['image_front_data'].apply(lambda x: np.array(x).reshape(512, 512))
loaded_df['image_side_data'] = loaded_df['image_side_data'].apply(lambda x: np.array(x).reshape(512, 512))

In [None]:
loaded_df.head()

In [None]:
loaded_df.info()

In [None]:
pd.isnull(loaded_df['bmi'].iloc[5858])


In [None]:
#display all the null bmi values
loaded_df[loaded_df['bmi'].isnull()]

In [None]:
# Load face detector and landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [None]:
temp = pd.DataFrame()

In [None]:
labels = []

In [None]:
import cv2

In [None]:
# detect features for first 1 side image and then plot the image with the features and display using cv2
img = loaded_df['image_side_data'].iloc[80]
faces = detector(img)
for face in faces:
    print('face')
    landmarks = predictor(img, face)
    for n in range(0, 68):
        print('landmarks')
        x = landmarks.part(n).x
        y = landmarks.part(n).y
        # draw the points on the image
        img = cv2.circle(img, (x, y), 4, (255, 0, 0), -1)
# display the image using cv2
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
for i in range(len(loaded_df)):

    if pd.isnull(loaded_df['bmi'].iloc[i]):
        print('No BMI :',loaded_df['id'].iloc[i])
        continue

    image = loaded_df['image_front_data'].iloc[i]

    # Detect faces
    faces = detector(image)
    
    if not faces:
        print('No face detected :',loaded_df['id'].iloc[i])
        continue

    landmarks = predictor(image, faces[0])

    if not landmarks:
        print('No landmarks detected :',loaded_df['id'].iloc[i])
        continue
        
    feature = []
    for coord in range(68):
        x_coord = landmarks.part(coord).x
        y_coord = landmarks.part(coord).y
        feature.append(x_coord)
        feature.append(y_coord)
        
    temp = pd.concat([temp, pd.DataFrame([feature])], ignore_index=True)
    labels.append(loaded_df['bmi'].iloc[i])

In [None]:
len(labels)

In [None]:
test = pd.Series(labels)
test.value_counts()


In [None]:
test.isna().sum()

In [None]:
temp.head()

In [None]:
temp.shape

In [None]:
def PCA(dataset):
    mean = np.mean(dataset, axis=0)
    z = dataset - mean
    cov_matrix = (1/len(dataset)) * (z.T).dot(z)
    eig_values, eig_vectors = np.linalg.eig(cov_matrix)
    eig_pairs = [(eig_values[i], eig_vectors[:,i]) for i in range(len(eig_values))]
    eig_pairs.sort(key=lambda x: x[0], reverse=True)

    sorted_eigvalues = [eig_pairs[i][0] for i in range(len(eig_values))]
    sorted_eigvectors = np.array([eig_pairs[i][1] for i in range(len(eig_values))])

    cumulative_variance = np.cumsum(sorted_eigvalues) / np.sum(sorted_eigvalues)
    plt.plot(cumulative_variance)
    plt.xlabel('Number of Principal Components')
    plt.ylabel('Cumulative Variance')
    plt.title('Cumulative Variance vs Number of Principal Components')
    plt.show()

    n_components = np.argmax(cumulative_variance > 0.99) + 1

    print('Number of Principal Components required:', n_components)

    W = sorted_eigvectors[:, :20]
    print('Shape of W:', W.shape)

    dataset_pca = z.dot(W)
    print('Shape of Reduced Dataset:', dataset_pca.shape)
    return W, dataset_pca, mean

In [None]:
W, dataset_reduced, mean = PCA(np.array(temp))

In [None]:
# Step 2: Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(dataset_reduced, labels, test_size=0.2, random_state=42)

# Step 3: Train the Model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 4: Make Predictions
y_pred = model.predict(X_test)

# Step 5: Evaluate the Model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100  # Mean Absolute Percentage Error

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")