<b> KNN Regression Model for Water Intake Recommendation </b>

In [2]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer

seed = 0
np.random.seed(seed)

In [3]:
intake_data = pd.read_csv("./water_drinking_data.csv")
intake_data.head()

Unnamed: 0,age,weight,height,activity_level,temperature,water_intake
0,28,68,1.72,1.5,23,2.7
1,35,75,1.78,1.6,22,3.1
2,42,82,1.81,1.8,25,2.9
3,50,90,1.85,1.3,18,2.3
4,29,67,1.69,1.5,21,2.6


Activity_level measures in METs: 
https://en.wikipedia.org/wiki/Metabolic_equivalent_of_task

Create train/test data

In [4]:
intake_X = intake_data.iloc[:, :-1]  # features of intake data
intake_y = intake_data['water_intake']  # labels of intake data

# scale my data
scaler = StandardScaler()
intake_X_scaled = scaler.fit_transform(intake_X)

# create 80%/20% train/test split
intake_X_tr, intake_X_te, intake_y_tr, intake_y_te = train_test_split(intake_X_scaled, intake_y, test_size=0.2, random_state=seed)

Find the best k value

In [5]:
knn = KNeighborsRegressor()
param_grid = {'n_neighbors': [3, 5, 7, 9, 11]}
grid_search = GridSearchCV(knn, param_grid, cv=3)
grid_search.fit(intake_X_tr, intake_y_tr)
best_k = grid_search.best_params_['n_neighbors']
print("The best k value is: {}".format(best_k))

The best k value is: 3


In [6]:
# initialize KNN reg model
model = KNeighborsRegressor(n_neighbors=best_k)
model.fit(intake_X_tr, intake_y_tr)


Evaluate model performance

In [7]:
y_te_pred = model.predict(intake_X_te)
mse = mean_squared_error(intake_y_te, y_te_pred)
print("mse is {}".format(mse))

mse is 0.013950617283950627


In [8]:
def recommend_pred(age, weight, height, activity_level, temperature, model):
    X_df = pd.DataFrame({"age": [age], "weight": [weight], "height": [height], "activity_level": [activity_level], "temperature": [temperature]})
    X_scaled = scaler.transform(X_df)
    intake_level = model.predict(X_scaled)
    return float(intake_level)

Example Data Prediction

In [22]:
# result in liters
print(recommend_pred(25, 60, 1.65, 1.3, 22, model))
print(recommend_pred(35, 75, 1.78, 1.6, 24, model))
print(recommend_pred(21, 10, 1.85, 1.1, 14, model))
print(recommend_pred(21, 10, 1.85, 1.1, 33, model))
print(recommend_pred(21, 10, 1.99, 1.1, 33, model))

2.7000000000000006
3.233333333333333
2.6999999999999997
2.766666666666667
2.8333333333333335
