# New Section

In [None]:
from pathlib import Path

In [None]:
import logging

# Implement industry-appropriate security in the form of a logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Output logs to my_log.txt
handler = logging.FileHandler('my_log.txt')
logger.addHandler(handler)
logger.propagate = False


In [None]:
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
from ipywidgets import VBox, RadioButtons, Label, Image, HTML, Text, Dropdown
from IPython.display import display, clear_output

In [None]:
import pandas as pd

# Function to clean and transform data if needed
def clean_data():
  data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/ObesityDataSet.csv")
  data.dropna(inplace=True)
  columns_to_keep = ["Age", "Height", "Weight", "Gender", "FAVC", "FAF", "NObeyesdad"]
  data = data[columns_to_keep]
  data.to_csv("Cleaned_ObesityDataSet.csv", index=False)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import joblib
import matplotlib.pyplot as plt
import pandas as pd

def create_model():
    # Read the CSV file into a DataFrame
    data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/Cleaned_ObesityDataSet.csv")

    # Encode strings to integers
    gender_mapping = {"Male": 0, "Female": 1}
    favc_mapping = {"no": 0, "yes": 1}

    # Map the values to data
    data["Gender"] = data["Gender"].map(gender_mapping)
    data["FAVC"] = data["FAVC"].map(favc_mapping)

    #data.to_csv("Example_ObesityDataSet.csv", index=False)

    # Select features and target variable
    X = data[["Age","Height","Weight", "Gender", "FAVC", "FAF"]]
    y = data["NObeyesdad"]  # Target variable

    # Split data into training/testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Machine learning model
    model = RandomForestClassifier(n_estimators=300, random_state=42)

    # Train the model on the training data
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    # Generate accuracy score
    accuracy = accuracy_score(y_test, y_pred)

    # Save the model
    joblib.dump(model, 'ObesityData.joblib')

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Log model created
    logging.info("Model created")

    # Return trained model and accuracy
    return model, accuracy, conf_matrix ,y_test, y_pred


In [None]:
def obese_guess(age, gender, height, weight, favc, faf):
    # Map input data
    input_data = pd.DataFrame({
        "Age": [age],
        "Height": [height],
        "Weight": [weight],
        "Gender": [gender],
        "FAVC": [favc],
        "FAF": [faf],
    })
    # Encode strings to integers
    gender_mapping = {"Male": 0, "Female": 1}
    favc_mapping = {"no": 0, "yes": 1}

    input_data["Gender"] = input_data["Gender"].map(gender_mapping)
    input_data["FAVC"] = input_data["FAVC"].map(favc_mapping)

    # prediction
    prediction = model.predict(input_data)

    # Map predictions back to their corresponding categories
    pre_dict = {
        "Insufficient_Weight": "Insufficient Weight",
        "Normal_Weight": "Normal Weight",
        "Overweight_Level_I": "Overweight Level I",
        "Overweight_Level_II": "Overweight Level II",
        "Obesity_Type_I": "Obesity Type I",
        "Obesity_Type_II": "Obesity Type II",
        "Obesity_Type_III": "Obesity Type III"
    }

    return (pre_dict[prediction[0]])



In [None]:

def obese_guess_seq(age, gender, height, weight, favc, faf, label_encoder):
    input_data = pd.DataFrame({
        "Age": [age],
        "Height": [height],
        "Weight": [weight],
        "Gender": [gender],
        "FAVC": [favc],
        "FAF": [faf]
    })

    # Encode strings to integers
    gender_mapping = {"Male": 0, "Female": 1}
    favc_mapping = {"no": 0, "yes": 1}


    # Encode categorical features
    input_data["Gender"] = input_data["Gender"].map(gender_mapping)
    input_data["FAVC"] = input_data["FAVC"].map(favc_mapping)


    # Step 3: Make predictions
    predictions = seq_model.predict(input_data)

    # If you want to get the predicted class labels (e.g., 'Normal Weight', 'Obesity Type I', etc.)
    predicted_labels = label_encoder.inverse_transform(predictions.argmax(axis=1))
    return "According to the sequential model, people who live a similar lifestyle tend to be: %s" % (predicted_labels)


In [None]:
import numpy as np

# Assuming predictions is the array of probabilities
predictions = np.array([0.40081695, 0.23242158, 0.05647092, 0.00840507, 0.01385038, 0.21131594, 0.07671913])

# Find the index of the maximum value (i.e., the predicted class label)
predicted_class_index = np.argmax(predictions)

# Print the predicted class index
print("Predicted class index:", predicted_class_index)

Predicted class index: 0


In [None]:
#  Male or Female gender widget
gender = widgets.ToggleButtons(
            options=['Male', 'Female']
        )

In [None]:
# number of fa
age = widgets.IntSlider(
            value=21, # default value
            min=14,
            max=61,
            step=1,
            style={'description_width': 'initial', 'handle_color': '#16a085'}
        )

In [None]:
# Define the options for the dropdown
height_options = [5, 6,7]

# Create a Dropdown widget
height = Dropdown(
    options=height_options,
    description='Feet:',
    disabled=False
)

In [None]:
# Define the options for the dropdown
inches_options = [0 ,1 ,2 ,3 , 4, 5, 6, 7, 8, 9, 10, 11]

# Create a Dropdown widget
inches = Dropdown(
    options=inches_options,
    description='Inches:',
    disabled=False
)

In [None]:
# Convert meters to feet
def feet_meters(feet, inches):
    return (feet * .3048) + ((inches/12) * .3048)


In [None]:
# Create weight widget
weight = widgets.BoundedFloatText(
    value=90,
    min=90,  # Set minimum value
    max=1000,
    description='Weight:',
    disabled=False
)

In [None]:
# Create favc radio buttons
favc = RadioButtons(options = ['yes', 'no'], description='Choose one:')

In [None]:
# Create faf widget
faf = widgets.IntSlider(
            value=0, # default value
            min=0,
            max=3,
            step=1,
            style={'description_width': 'initial', 'handle_color': '#16a085'}
        )

In [None]:
# Create Models
model_one, accuracy_one, conf_matrix, y_test, y_pred= create_model()
#model_two, accuracy_two, label_encoder = create_seq_model()


In [None]:
# Load Models
model = joblib.load('ObesityData.joblib')
#seq_model = joblib.load('ObesityDataSeq.joblib')


In [None]:
# Accuracy of RandomForestClassifier Model
print("Accuracy:", accuracy_one)

Accuracy: 0.9692671394799054


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Plot confusion matrix as a heatmap
def obesity_confusion_matrix():

    # Map numerical labels back to their corresponding categories
    obesity_levels = ["Insufficient Weight", "Normal Weight", "Overweight Level I",
                      "Overweight Level II", "Obesity Type I", "Obesity Type II",
                      "Obesity Type III"]

    # Create a heatmap of the confusion matrix
    plt.figure(figsize=(5, 4))
    sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g', xticklabels=obesity_levels, yticklabels=obesity_levels)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

output_confusion = widgets.Output()

# Generate the heatmap and capture the output
with output_confusion:
    obesity_confusion_matrix()


In [None]:
import matplotlib.pyplot as plt

def feature_importance():

    data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/ObesityDataSet.csv")

    X = data[["Age","Height","Weight", "Gender", "FAVC", "FAF"]]
    # Get feature importances from the trained model
    feature_importances = model.feature_importances_

    # Create a bar plot to visualize feature importances
    plt.figure(figsize=(5, 3))
    plt.barh(X.columns, feature_importances)
    plt.xlabel('Feature Importance')
    plt.ylabel('Feature')
    plt.title('Feature Importance Plot')
    plt.show()

# Create an Output widget to capture the heatmap
output_feature = widgets.Output()

# Generate the heatmap and capture the output
with output_feature:
    feature_importance()

In [None]:
def data_heatmap():
    # Read the CSV file into a DataFrame
    data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/Cleaned_ObesityDataSet.csv")

    # Encode strings to integers
    label_encoder = LabelEncoder()
    data["Gender"] = label_encoder.fit_transform(data["Gender"])
    data["FAVC"] = label_encoder.fit_transform(data["FAVC"])
    data["NObeyesdad"] = label_encoder.fit_transform(data["NObeyesdad"])

    # Select features and target variable
    X = data[["Age","Height","Weight", "Gender", "FAVC", "FAF"]]

    # Create correlation matrix
    corr_matrix = X.corr()
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
    plt.title('Correlation Matrix')
    plt.show()


# Create an Output widget to capture the heatmap
output_heatmap = widgets.Output()

# Generate the heatmap and capture the output
with output_heatmap:
    data_heatmap()

In [None]:
# button send
button_send = widgets.Button(
                description='Calculate',
                tooltip='Send',
                style={'description_width': 'initial'}
            )

output = widgets.Output()

def on_button_clicked(event):
    with output:
        # Display calculations
        clear_output()
        float_value = float(weight.value)
        obese_prediction = obese_guess(age.value, gender.value, feet_meters(height.value, inches.value), float_value / 2.205, favc.value, faf.value)
        obese_html = f"<h1>Based on this lifestyle, your weight category is: {obese_prediction}"
        html_widget = HTML(value=obese_html)
        display(html_widget)

        # Print weight loss recommendation depending on obese_prediction
        if obese_prediction == "Obesity Type I" or obese_prediction == "Obesity Type II" or obese_prediction == "Obesity Type III":
          weight_html = widgets.HTML(
          value="""
          <p>Weight loss is recommended!
          <a href='https://www.medicalnewstoday.com/articles/exercise-for-obese-people'>
                  Learn more about weight loss!
                  </a></p>
          """
          )
          display(weight_html)
        elif obese_prediction == "Overweight Level I" or obese_prediction == "Overweight Level I":
          weight_html = widgets.HTML(
          value="""
          <p>Weight loss is recommended!
          <a href='https://www.medicalnewstoday.com/articles/exercise-for-obese-people'>
                  Learn more about weight loss!
                  </a></p>
          """
          )

          display(weight_html)
        elif obese_prediction == "Normal Weight":
          weight_html = widgets.HTML(
          value="""
          <p>Weight loss is not recommended! Consider putting on muscle!</p>
          <a href='https://www.puregym.com/blog/the-best-gym-workout-plan-for-gaining-muscle/'>
                  Learn more about gaining muscle!
                  </a>
          """
          )
          display(weight_html)
        else:
            weight_html = widgets.HTML(
                value="""
                <p>Weight loss is not recommended!</p>
                <a href='https://www.betterhealth.vic.gov.au/health/healthyliving/weight-and-muscle-gain'>
                        Learn more about gaining weight and muscle!
                        </a>
                """
            )
            display(weight_html)

        # Store entered information in log
        logger.info(
            "User entered: Age: %s, Gender: %s, Height: %s, Weight: %.2f, FAVC: %s, FAF: %s",
            age.value,
            gender.value,
            feet_meters(height.value, inches.value),
            float_value / 2.205,
            favc.value,
            faf.value,
        )
        # Log model prediction
        logger.info("Model prediction: %s", obese_prediction)

button_send.on_click(on_button_clicked)

vbox_result = widgets.VBox([output])

In [None]:

# Load the CSV file into a DataFrame
data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/Cleaned_ObesityDataSet.csv")

# Define obesity levels
obesity_levels = [
    "Insufficient_Weight", "Normal_Weight", "Overweight_Level_I",
    "Overweight_Level_II", "Obesity_Type_I", "Obesity_Type_II", "Obesity_Type_III"
]

# Create a dropdown widget for obesity levels
obesity_dropdown = widgets.Dropdown(
    options=obesity_levels,
    value="Normal_Weight",
    description='Obesity Level:',
    disabled=False,
)

# Create an output widget
output_box = widgets.Output()

# Define the search function
def search_obesity_level(change):
    output_box.clear_output()
    selected_level = change['new']
    filtered_data = data[data['NObeyesdad'] == selected_level]
    with output_box:
        display(widgets.HTML(filtered_data.to_html()))

# Attach the search function to the dropdown widget
obesity_dropdown.observe(search_obesity_level, names='value')

# Create a scrollable output area
scrollable_output = widgets.Box([output_box], layout=widgets.Layout(overflow='auto', height='300px', width='100%'))

# Display the widgets
display(obesity_dropdown)
display(scrollable_output)

# Initial search to display default results
search_obesity_level({'new': obesity_dropdown.value})

Dropdown(description='Obesity Level:', index=1, options=('Insufficient_Weight', 'Normal_Weight', 'Overweight_L…

Box(children=(Output(),), layout=Layout(height='300px', overflow='auto', width='100%'))

In [None]:
import plotly.express as px

def men_levels():
    # Drop rows with any missing values
    data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/Cleaned_ObesityDataSet.csv")

    # Encode categorical variables
    gender_mapping = {"Male": 0, "Female": 1}
    obesity_mapping = {
        "Insufficient_Weight": 0,
        "Normal_Weight": 1,
        "Overweight_Level_I": 2,
        "Overweight_Level_II": 3,
        "Obesity_Type_I": 4,
        "Obesity_Type_II": 5,
        "Obesity_Type_III": 6
    }

    # Encode data
    data["Gender"] = data["Gender"].map(gender_mapping)
    data["NObeyesdad"] = data["NObeyesdad"].map(obesity_mapping)

    # Convert height and weight to inches and pounds
    data["Height"] = data["Height"] * 39.3701
    data["Weight"] = data["Weight"] * 2.20462

    # Filter the dataset to include only men
    men_data = data[data["Gender"] == 0]

    # Create a scatter plot for height vs. weight, colored by obesity levels
    plt.figure(figsize=(12, 8))
    scatter = sns.scatterplot(
        x="Height", y="Weight", hue="NObeyesdad", palette="coolwarm", data=men_data, s=100, alpha=0.7, edgecolor="w"
    )

    # Add labels and title
    plt.title('Obesity Levels for Men by Height and Weight')
    plt.xlabel('Height (in)')
    plt.ylabel('Weight (lbs)')
    plt.legend(title='Obesity Levels', loc='upper right', labels=[
        'Insufficient Weight', 'Normal Weight', 'Overweight Level I', 'Overweight Level II',
        'Obesity Type I', 'Obesity Type II', 'Obesity Type III'
    ])
    plt.show()


# Create an Output widget to capture the scatterplot
output_men = widgets.Output()

# Generate the scatterplot and capture the output
with output_men:
    men_levels()


In [None]:
def women_levels():
    # Drop rows with any missing values
    data = pd.read_csv("https://raw.githubusercontent.com/DavidRandolphjr/CsCapstone/main/Cleaned_ObesityDataSet.csv")

    # Encode categorical variables
    gender_mapping = {"Male": 0, "Female": 1}
    obesity_mapping = {
        "Insufficient_Weight": 0,
        "Normal_Weight": 1,
        "Overweight_Level_I": 2,
        "Overweight_Level_II": 3,
        "Obesity_Type_I": 4,
        "Obesity_Type_II": 5,
        "Obesity_Type_III": 6
    }

    # Encode data
    data["Gender"] = data["Gender"].map(gender_mapping)
    data["NObeyesdad"] = data["NObeyesdad"].map(obesity_mapping)

    # Convert height and weight to inches and pounds
    data["Height"] = data["Height"] * 39.3701
    data["Weight"] = data["Weight"] * 2.20462

    # Filter the dataset to include only women
    women_data = data[data["Gender"] == 1]

    # Create a scatter plot for height vs. weight, colored by obesity levels
    plt.figure(figsize=(12, 8))
    scatter = sns.scatterplot(
        x="Height", y="Weight", hue="NObeyesdad", palette="coolwarm", data=women_data, s=100, alpha=0.7, edgecolor="w"
    )

    # Add labels and title
    plt.title('Obesity Levels for Women by Height and Weight')
    plt.xlabel('Height (in)')
    plt.ylabel('Weight (lbs)')
    plt.legend(title='Obesity Levels', loc='upper right', labels=[
        'Insufficient Weight', 'Normal Weight', 'Overweight Level I', 'Overweight Level II',
        'Obesity Type I', 'Obesity Type II', 'Obesity Type III'
    ])
    plt.show()


# Create an Output widget to capture the scatterplot
output_women = widgets.Output()

# Generate the scatterplot and capture the output
with output_women:
    women_levels()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
precision_test = precision_score(y_test, y_pred, average='weighted')
print("Precision_score:", precision_test)


Precision_score: 0.9698076723024862


In [None]:
# Create carousel to hold charts
carousel = widgets.Tab([output_heatmap, output_feature, output_confusion, output_men, output_women])
carousel.set_title(0, 'Heatmap')
carousel.set_title(1, 'Feature Importance Plot')
carousel.set_title(2, 'Confusion Matrix')
carousel.set_title(3, 'Men Obesity Levels')
carousel.set_title(4, 'Women Obesity Levels')


In [None]:
# Output user interface
text_0 = widgets.HTML(value="<h1>Predict obesity levels based on lifestyle!</h1>")
text_1 = widgets.HTML(value="<h2>Enter gender and age.</h2>")
text_2= widgets.HTML(value="<h2>Do you eat high caloric food frequently?</h2>")
text_3= widgets.HTML(value="<h2>On a scale of 0-3, how active are you?</h2>")
text_5= widgets.HTML(value="<h2>Do you drink alchohol?</h2>")

vbox_text = widgets.VBox([text_0, text_1, gender, age, height, inches, weight, text_2, favc, text_3, faf, button_send, carousel])

In [None]:
page = widgets.HBox( [vbox_text, vbox_result])
page.layout = widgets.Layout(margin='10px 50px 15px 50px')
# Display the page
display(page)

# Display obesity_dropdown
display(obesity_dropdown)

# Display scrollable_output
query_html= widgets.HTML(value="<h2>Make queries with our dataset!</h2>")
display(query_html)

# Display scrollable_output
display(scrollable_output)

HBox(children=(VBox(children=(HTML(value='<h1>Predict obesity levels based on lifestyle!</h1>'), HTML(value='<…

Dropdown(description='Obesity Level:', index=1, options=('Insufficient_Weight', 'Normal_Weight', 'Overweight_L…

HTML(value='<h2>Make queries with our dataset!</h2>')

Box(children=(Output(),), layout=Layout(height='300px', overflow='auto', width='100%'))