In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

# Sample dataset

In [44]:
data = pd.read_json('/content/drive/MyDrive/Colab Notebooks/nutrition_data.json')


# Select feature and target variable

In [64]:
features = ['Age', 'Weight' , 'Height', 'Activity_Level', 'Goal']
target = 'Estimated_Calories'

# Split data into features and target variable

In [65]:
X = data[features]
y = data[target]


# Split data into training and testing sets

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Preprocessing for categorical variables

In [67]:
categorical_cols = ['Activity_Level', 'Goal']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


# Combine preprocessing steps


In [68]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols)
    ])

# Create pipelines for both models with preprocessing

In [69]:
linear_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

forest_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train and evaluate Linear Regression model

In [70]:
linear_pipeline.fit(X_train, y_train)
linear_y_pred = linear_pipeline.predict(X_test)
linear_mse = mean_squared_error(y_test, linear_y_pred)
print(f"Linear Regression Mean Squared Error: {linear_mse}")

Linear Regression Mean Squared Error: 25661.96853585435



# Train and evaluate Random Forest Regressor model

In [71]:
forest_pipeline.fit(X_train, y_train)
forest_y_pred = forest_pipeline.predict(X_test)
forest_mse = mean_squared_error(y_test, forest_y_pred)
print(f"Random Forest Regressor Mean Squared Error: {forest_mse}")

Random Forest Regressor Mean Squared Error: 23362.40246864075
