In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# File paths
csv_path = "nutritional_values.csv"
images_folder = "Dates/"

# Load nutritional values
df = pd.read_csv(csv_path)

# Ensure required columns
required_columns = ['image_id', 'calories', 'proteins', 'total_fat', 'glucose', 'cholesterol', 'water', 'Energy (Kcal)']
if not all(col in df.columns for col in required_columns):
    raise ValueError(f"CSV must contain columns: {', '.join(required_columns)}")

# Preprocessing: Load images and normalize nutritional values
X = []
y = []
missing_images = 0

for _, row in df.iterrows():
    image_path = os.path.join(images_folder, row['image_id'] + ".jpg")
    if os.path.exists(image_path):
        # Load image, resize to 64x64, and flatten it
        image = load_img(image_path, target_size=(64, 64))  # Resize to smaller dimensions
        image = img_to_array(image).flatten() / 255.0  # Flatten and normalize pixel values
        X.append(image)

        # Extract nutritional values
        y.append(row[['calories', 'proteins', 'total_fat', 'glucose', 'cholesterol', 'water', 'Energy (Kcal)']].values)
    else:
        missing_images += 1

print(f"Missing images: {missing_images}")
X = np.array(X)
y = np.array(y)

# Ensure that X and y are not empty
if X.size == 0 or y.size == 0:
    raise ValueError("No data available for training. Please check the image paths and CSV file.")

# Normalize the target values using MinMaxScaler
scaler_y = MinMaxScaler()
y = scaler_y.fit_transform(y)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")

Missing images: 0
Training samples: 2165, Testing samples: 542


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Initialize and train the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Training complete
print("Random Forest Model trained successfully.")

In [51]:
# Predictions on test data
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"Test MSE: {mse}")
print(f"Test MAE: {mae}")

Test MSE: 0.1062395442308227
Test MAE: 0.2577000530635831


In [None]:
from sklearn.model_selection import GridSearchCV

# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Grid Search
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)
best_rf_model = grid_search.best_estimator_

In [None]:
# Reverse scale target values
y_test_original = scaler_y.inverse_transform(y_test)
y_pred_original = scaler_y.inverse_transform(y_pred)

# Visualize predictions
import matplotlib.pyplot as plt

def visualize_predictions(X, y_true, y_pred, num_samples=5):
    for i in range(num_samples):
        plt.figure(figsize=(12, 5))

        # Show image (unflatten and reshape to 64x64)
        plt.subplot(1, 2, 1)
        plt.imshow(X[i].reshape(64, 64, 3))
        plt.axis('off')
        plt.title("Input Image")

        # Show true vs predicted values
        plt.subplot(1, 2, 2)
        indices = range(len(y_true[i]))
        plt.bar(indices, y_true[i], alpha=0.6, label='True', color='blue')
        plt.bar(indices, y_pred[i], alpha=0.6, label='Predicted', color='orange')
        plt.xticks(indices, ['Calories', 'Proteins', 'Fat', 'Glucose', 'Cholesterol', 'Water', 'Energy'])
        plt.legend()
        plt.title("True vs Predicted Values")

        plt.show()

# Visualize results
visualize_predictions(X_test, y_test_original, y_pred_original)

In [None]:
# let user input image path
image_path = input("Enter the path of the image: ")
image = load_img(image_path, target_size=(64, 64))
image = img_to_array(image).flatten() / 255.0

# Predict the nutritional values
nutritional_values = best_rf_model.predict([image])
nutritional_values = scaler_y.inverse_transform(nutritional_values)

# Display the nutritional values
print("Predicted Nutritional Values:")
print(f"Calories: {nutritional_values[0][0]:.2f}")
print(f"Proteins: {nutritional_values[0][1]:.2f}")
print(f"Total Fat: {nutritional_values[0][2]:.2f}")
print(f"Glucose: {nutritional_values[0][3]:.2f}")
print(f"Cholesterol: {nutritional_values[0][4]:.2f}")
print(f"Water: {nutritional_values[0][5]:.2f}")
print(f"Energy: {nutritional_values[0][6]:.2f}")

# Display the image
plt.imshow(load_img(image_path))
plt.axis('off')
plt.show()

# plot predicted and actual images side by side
plt.figure(figsize=(12, 5))

# Show image (unflatten and reshape to 64x64)
plt.subplot(1, 2, 1)
plt.imshow(image.reshape(64, 64, 3))
plt.axis('off')
plt.title("Input Image")

# Show true vs predicted values
plt.subplot(1, 2, 2)
indices = range(len(nutritional_values[0]))
plt.bar(indices, nutritional_values[0], alpha=0.6, label='Predicted', color='orange')
plt.xticks(indices, ['Calories', 'Proteins', 'Fat', 'Glucose', 'Cholesterol', 'Water', 'Energy'])
plt.legend()
plt.title("Predicted Values")

plt.show()



In [None]:
# Save the model
import joblib

model_filename = "002_random_forest_model.pkl"
joblib.dump(best_rf_model, model_filename)
print(f"Model saved as {model_filename}")


In [None]:
import numpy as np

# Define accuracy metric for regression
def regression_accuracy(y_true, y_pred, tolerance=0.1):
    relative_error = np.abs((y_true - y_pred) / y_true)  # Relative error
    accurate_predictions = np.sum(relative_error <= tolerance)  # Predictions within tolerance
    accuracy = (accurate_predictions / len(y_true)) * 100  # Percentage accuracy
    return accuracy

train_accuracy_rf = regression_accuracy(y_train, rf_model.predict(X_train))
test_accuracy_rf = regression_accuracy(y_test, rf_model.predict(X_test))
print(f"Random Forest - Train Accuracy: {train_accuracy_rf:.2f}%, Test Accuracy: {test_accuracy_rf:.2f}%")