In [1]:
from pydantic import BaseModel
import pandas as pd

import joblib



class InputModel(BaseModel):
    Gender: str
    Age: int
    Height: float
    Weight: float
    family_history_with_overweight:str
    FAVC: str
    FCVC: float
    NCP: float
    CAEC: str
    SMOKE: str
    CH2O: float
    SCC: str
    FAF: float
    TUE: float
    CALC: str
    MTRANS: str

input = InputModel(
    Gender= "Female",
  Age= 21,
  Height= 1.62,
  Weight= 64.0,
  family_history_with_overweight= "yes",
  FAVC= "no",
  FCVC= 2.0,
  NCP= 3.0,
  CAEC= "Sometimes",
  SMOKE= "no",
  CH2O= 2.0,
  SCC= "no",
  FAF= 0.0,
  TUE= 1.0,
  CALC= "no",
  MTRANS= "Public_Transportation"
)

encoder = joblib.load("../data/encoder.pkl")
scaler = joblib.load("../data/scaler.pkl")

def generate_dataframe(data:InputModel):
    return pd.DataFrame([data.model_dump()])
def  preprocess(df:pd.DataFrame)-> pd.DataFrame:
    #Binary encoding for binary data
    binary_features = ["family_history_with_overweight", "FAVC", "SMOKE", "SCC"]
    df[binary_features] = df[binary_features].replace({"yes":1, "no":0})

    #One-Hot encoding for categorical features
    categorical_features = ["Gender", "CAEC", "CALC", "MTRANS"]
    encoded_cats = encoder.transform(df[categorical_features])
    encoded_cat_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features))

    df = df.drop(columns=df[categorical_features])
    df = pd.concat([df,encoded_cat_df], axis=1)
    
    return df

def predict(prediction:int)-> str:
    prediction_map = {
        0: "Insufficient_Weight",
        1: "Normal_Weight",
        2: "Overweight_Level_I",
        3: "Overweight_Level_II",
        4: "Obesity_Type_I",
        5: "Obesity_Type_II",
        6: "Obesity_Type_III"
    }
    return prediction_map.get(prediction, "Invalid Prediction")

def scaleData(df:pd.DataFrame)->pd.DataFrame:
    continuous_features = ["Age", "Height", "Weight", "NCP", "CH2O", "FAF"]
    df1 = df
    df1[continuous_features] = scaler.transform(df1[continuous_features])

    return df1

df = preprocess(generate_dataframe(input))
scaled_data = scaleData(df)
print(df)
print(scaled_data)



        Age   Height    Weight  family_history_with_overweight  FAVC  FCVC  \
0 -0.521741 -0.87438 -0.862558                               1     0   2.0   

        NCP  SMOKE      CH2O  SCC  ...  CAEC_Frequently  CAEC_Sometimes  \
0  0.404102      0 -0.013141    0  ...              0.0             1.0   

   CAEC_no  CALC_Frequently  CALC_Sometimes  CALC_no  MTRANS_Bike  \
0      0.0              0.0             0.0      1.0          0.0   

   MTRANS_Motorbike  MTRANS_Public_Transportation  MTRANS_Walking  
0               0.0                           1.0             0.0  

[1 rows x 23 columns]
        Age   Height    Weight  family_history_with_overweight  FAVC  FCVC  \
0 -0.521741 -0.87438 -0.862558                               1     0   2.0   

        NCP  SMOKE      CH2O  SCC  ...  CAEC_Frequently  CAEC_Sometimes  \
0  0.404102      0 -0.013141    0  ...              0.0             1.0   

   CAEC_no  CALC_Frequently  CALC_Sometimes  CALC_no  MTRANS_Bike  \
0      0.0       

  df[binary_features] = df[binary_features].replace({"yes":1, "no":0})


In [None]:
import os


/code/model.pkl
