##Import Library

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

##Load Dataset

In [17]:
data = pd.read_csv("/content/sample_data/dummy_data.csv")

##Select features and target

In [18]:
features = ['age', 'height', 'weight', 'diabetes_history', 'diabetes_heritage']
target = 'recommended_sugar_intake'

print("Columns in the DataFrame:", data.columns.tolist())

Columns in the DataFrame: ['user_id', 'age', 'height', 'weight', 'diabetes_history', 'diabetes_heritage', 'preferred_food', 'diet_labels', 'recommended_sugar_intake', 'calorie_intake']


##Handle missing values

In [19]:
data = data.dropna()

##List of features to scale

In [20]:
categorical_features = ['diabetes_history', 'diabetes_heritage']
numeric_features = ['age', 'height', 'weight']


##Preprocess data

In [21]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

##Linear Regression Model

In [22]:
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and linear regression
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

fit the model to the data

In [23]:
model.fit(X_train, y_train)

Make predictions

In [24]:
y_pred = model.predict(X_test)

##Evalute the Model

In [25]:
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

mse, rmse

(30.137752752574983, 5.489786221026733)

##Prediction on New Data

In [26]:
def predict_sugar_intake(model, age, height, weight, diabetes_history, diabetes_heritage):
    # Create a DataFrame with the input data
    input_data = pd.DataFrame({
        'age': [age],
        'height': [height],
        'weight': [weight],
        'diabetes_history': [diabetes_history],
        'diabetes_heritage': [diabetes_heritage]
    })

    # Predict the recommended sugar intake
    predicted_sugar_intake = model.predict(input_data)

    return predicted_sugar_intake[0]

# Example usage of the function
example_prediction = predict_sugar_intake(model, age=44, height=175, weight=70, diabetes_history='Yes', diabetes_heritage='No')
example_prediction

26.713166854259914

In [27]:
import pickle
pickle.dump(model,open('Recommended_Sugar_Intake_Model.pkl','wb'))

In [None]:
import h5py

# Load the model from the .pkl file
model = pickle.load(open('Recommended_Sugar_Intake_Model.pkl', 'rb'))

# Create an .h5 file and save the model components
with h5py.File('Recommended_Sugar_Intake_Model.h5', 'w') as h5file:
    # Save the model parameters
    h5file.create_dataset('linear_regression_coefficients', data=model.named_steps['regressor'].coef_)
    h5file.create_dataset('linear_regression_intercept', data=model.named_steps['regressor'].intercept_)

    # Save the numeric features
    h5file.create_dataset('numeric_features', data=numeric_features)

    # Save the categorical features
    h5file.create_dataset('categorical_features', data=categorical_features)

    # Save the OneHotEncoder categories
    categories = model.named_steps['preprocessor'].named_transformers_['cat'].categories_
    for i, category in enumerate(categories):
        h5file.create_dataset(f'category_{i}', data=category)

# Confirm the file creation
print("Model has been saved to Recommended_Sugar_Intake_Model.h5")

Model has been saved to Recommended_Sugar_Intake_Model.h5
