In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load test data
testdata = pd.read_csv('test.csv')

# Calculate BMI
testdata['Height_m'] = testdata['Height'] / 100
testdata['BMI'] = testdata['Weight'] / (testdata['Height_m'] ** 2)
testdata.drop(columns='Height_m', inplace=True)  # Optional cleanup

# Note: test.csv won't have 'Calories' or 'Duration' if that's your target or not present
# So you cannot create Calories_per_min unless both columns exist in test.csv.
# If Duration exists in test.csv but Calories does not, you may skip Calories_per_min feature here.

# If Duration is available and you want to create Calories_per_min (assuming Calories exists), uncomment below:
# testdata['Calories_per_min'] = testdata['Calories'] / testdata['Duration']

# Define BMI category function
def bmi_category(bmi):
    if bmi < 18.5:
        return 'Underweight'
    elif 18.5 <= bmi < 25:
        return 'Normal'
    elif 25 <= bmi < 30:
        return 'Overweight'
    else:
        return 'Obese'

testdata['BMI_Category'] = testdata['BMI'].apply(bmi_category)

# Use the same LabelEncoder fitted on train data to transform test data
# So save the encoder from train side and reuse here
# For simplicity, fit encoder on train and then use transform on test:
# If you don't have saved le from train, recreate and fit on train BMI_Category before:

# Assuming you have le fitted on train:
le = LabelEncoder()
# If you don't have it saved, you must fit on train first then save or pass it here.

# For demonstration, fit on test BMI_Category but this is NOT recommended if you want consistent encoding:
# Instead, fit on train, save, and load here for transform only.
le.fit(['Underweight', 'Normal', 'Overweight', 'Obese'])  # fixed classes

testdata['BMI_Category_Encoded'] = le.transform(testdata['BMI_Category'])

testdata.drop(columns='BMI_Category', inplace=True)

# Now testdata has BMI and BMI_Category_Encoded just like traindata


In [2]:
testdata

Unnamed: 0,id,Sex,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,BMI,BMI_Category_Encoded
0,750000,male,45,177.0,81.0,7.0,87.0,39.8,25.854639,2
1,750001,male,26,200.0,97.0,20.0,101.0,40.5,24.250000,0
2,750002,female,29,188.0,85.0,16.0,102.0,40.4,24.049344,0
3,750003,female,39,172.0,73.0,20.0,107.0,40.6,24.675500,0
4,750004,female,30,173.0,67.0,16.0,94.0,40.5,22.386314,0
...,...,...,...,...,...,...,...,...,...,...
249995,999995,female,56,159.0,62.0,6.0,85.0,39.4,24.524346,0
249996,999996,male,32,202.0,101.0,3.0,84.0,38.4,24.752475,0
249997,999997,female,31,164.0,64.0,14.0,98.0,40.1,23.795360,0
249998,999998,female,62,158.0,61.0,25.0,106.0,40.7,24.435187,0


In [3]:
testdata.to_csv('test_updated.csv', index=False)
