<a href="https://colab.research.google.com/github/Mansi-purwar/Infinite_Locus_Hackathon/blob/main/FitnessRecommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, Normalizer
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
import joblib

In [10]:
df= pd.read_csv('/content/dataset.csv')

In [12]:
df.head()

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity
0,1,Exercise 2,286.959851,91.892531,96.301115,45,Male,37,170,29.426275,Rainy,5
1,2,Exercise 7,343.453036,64.165097,61.104668,25,Male,43,142,21.286346,Rainy,5
2,3,Exercise 4,261.223465,70.846224,71.766724,20,Male,20,148,27.899592,Cloudy,4
3,4,Exercise 5,127.183858,79.477008,82.984456,33,Male,39,170,33.729552,Sunny,10
4,5,Exercise 10,416.318374,89.960226,85.643174,29,Female,34,118,23.286113,Cloudy,3


In [13]:
df.shape

(3864, 12)

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3864 entries, 0 to 3863
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  3864 non-null   int64  
 1   Exercise            3864 non-null   object 
 2   Calories Burn       3864 non-null   float64
 3   Dream Weight        3864 non-null   float64
 4   Actual Weight       3864 non-null   float64
 5   Age                 3864 non-null   int64  
 6   Gender              3864 non-null   object 
 7   Duration            3864 non-null   int64  
 8   Heart Rate          3864 non-null   int64  
 9   BMI                 3864 non-null   float64
 10  Weather Conditions  3864 non-null   object 
 11  Exercise Intensity  3864 non-null   int64  
dtypes: float64(4), int64(5), object(3)
memory usage: 362.4+ KB


In [15]:
df.isnull().sum()

ID                    0
Exercise              0
Calories Burn         0
Dream Weight          0
Actual Weight         0
Age                   0
Gender                0
Duration              0
Heart Rate            0
BMI                   0
Weather Conditions    0
Exercise Intensity    0
dtype: int64

In [16]:
df.duplicated().sum()

0

In [17]:
cat_features = ['Gender']
num_features = ['Dream Weight', 'Actual Weight', 'Age', 'BMI']

In [18]:
output_features = ['Exercise', 'Exercise Intensity', 'Duration']

In [19]:
le = LabelEncoder()
df['Exercise'] = le.fit_transform(df['Exercise'])

In [20]:
df.head(1)

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity
0,1,2,286.959851,91.892531,96.301115,45,Male,37,170,29.426275,Rainy,5


In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    df[cat_features + num_features],
    df[output_features],
    test_size=0.33,
    random_state=42
)

In [22]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('normalizer', Normalizer())
])

In [23]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encoder', OrdinalEncoder())
])

In [24]:
preprocessor = ColumnTransformer(
    transformers=[
        ('numeric', numeric_transformer, num_features),
        ('categorical', categorical_transformer, cat_features)
    ]
)

In [25]:
df.head(2)

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity
0,1,2,286.959851,91.892531,96.301115,45,Male,37,170,29.426275,Rainy,5
1,2,7,343.453036,64.165097,61.104668,25,Male,43,142,21.286346,Rainy,5


In [26]:
model = MultiOutputRegressor(GradientBoostingRegressor())


In [27]:
pipeline = Pipeline(steps=[
    ('preprocess', preprocessor),
    ('reg', model)
])

In [28]:
# Train the model
pipeline.fit(X_train, y_train)

# Save the model
joblib.dump(pipeline, 'model.pkl')

['model.pkl']

In [29]:
# Save the LabelEncoder
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']

In [30]:
loaded_model = joblib.load('model.pkl')

In [31]:
loaded_le = joblib.load('label_encoder.pkl')

In [32]:
new_observation = pd.DataFrame([['Male', 25, 70, 45, 29]], columns=cat_features + num_features)

In [33]:
exercise_encoded, intensity, duration = loaded_model.predict(new_observation)[0]

In [34]:
exercise = loaded_le.inverse_transform([int(exercise_encoded)])[0]

print(f"Predicted Exercise: {exercise}, Intensity: {intensity}, Duration: {duration}")

Predicted Exercise: Exercise 10, Intensity: 5.132311046624315, Duration: 27.776047155094417


In [36]:
import pickle

In [38]:
pickle.dump(df.to_dict(),open('model.pkl','wb'))

In [39]:
pickle.dump(df.to_dict(),open('label_encoder.pkl','wb'))