In [4]:
# --- Task 5: Food Calorie Prediction ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# --- Step 1: Load dataset ---
df = pd.read_csv("food.csv")  # upload dataset

print("Dataset shape:", df.shape)
print(df.head())

# --- Step 2: Select features (nutrients) and target (calories) ---
X = df[["Data.Fat.Total Lipid", "Data.Carbohydrate", "Data.Protein"]]   # input features
y = df["Data.Kilocalories"]                             # target

# --- Step 3: Train/test split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Step 4: Train Linear Regression model ---
model = LinearRegression()
model.fit(X_train, y_train)

# --- Step 5: Predictions ---
y_pred = model.predict(X_test)

# --- Step 6: Evaluation ---
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# --- Step 7: Example Prediction ---
example = [[10, 20, 5]]  # Fat=10g, Carbs=20g, Protein=5g
print("Predicted Calories:", model.predict(example)[0])

Dataset shape: (7413, 48)
     Category               Description  Nutrient Data Bank Number  \
0      BUTTER          BUTTER,WITH SALT                       1001   
1      BUTTER  BUTTER,WHIPPED,WITH SALT                       1002   
2  BUTTER OIL      BUTTER OIL,ANHYDROUS                       1003   
3      CHEESE               CHEESE,BLUE                       1004   
4      CHEESE              CHEESE,BRICK                       1005   

   Data.Alpha Carotene  Data.Ash  Data.Beta Carotene  Data.Beta Cryptoxanthin  \
0                    0      2.11                 158                        0   
1                    0      2.11                 158                        0   
2                    0      0.00                 193                        0   
3                    0      5.11                  74                        0   
4                    0      3.18                  76                        0   

   Data.Carbohydrate  Data.Cholesterol  Data.Choline  ...  \
0    



In [3]:
print(df.columns)

Index(['Category', 'Description', 'Nutrient Data Bank Number',
       'Data.Alpha Carotene', 'Data.Ash', 'Data.Beta Carotene',
       'Data.Beta Cryptoxanthin', 'Data.Carbohydrate', 'Data.Cholesterol',
       'Data.Choline', 'Data.Fiber', 'Data.Kilocalories',
       'Data.Lutein and Zeaxanthin', 'Data.Lycopene', 'Data.Manganese',
       'Data.Niacin', 'Data.Pantothenic Acid', 'Data.Protein',
       'Data.Refuse Percentage', 'Data.Retinol', 'Data.Riboflavin',
       'Data.Selenium', 'Data.Sugar Total', 'Data.Thiamin', 'Data.Water',
       'Data.Fat.Monosaturated Fat', 'Data.Fat.Polysaturated Fat',
       'Data.Fat.Saturated Fat', 'Data.Fat.Total Lipid',
       'Data.Household Weights.1st Household Weight',
       'Data.Household Weights.1st Household Weight Description',
       'Data.Household Weights.2nd Household Weight',
       'Data.Household Weights.2nd Household Weight Description',
       'Data.Major Minerals.Calcium', 'Data.Major Minerals.Copper',
       'Data.Major Minerals.Iro