In [28]:
import pandas                      as pd
import numpy                       as np
from sklearn.preprocessing         import StandardScaler, OneHotEncoder
from sklearn.linear_model          import LogisticRegression
from sklearn.model_selection       import train_test_split, cross_val_score, StratifiedKFold
from sklearn.multiclass            import OneVsOneClassifier, OneVsRestClassifier
from sklearn.compose               import ColumnTransformer
from sklearn.pipeline              import Pipeline

file_path = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/GkDzb7bWrtvGXdPOfk6CIg/Obesity-level-prediction-dataset.csv"
data = pd.read_csv(file_path)
data

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Female,21.000000,1.620000,64.000000,yes,no,2.0,3.0,Sometimes,no,2.000000,no,0.000000,1.000000,no,Public_Transportation,Normal_Weight
1,Female,21.000000,1.520000,56.000000,yes,no,3.0,3.0,Sometimes,yes,3.000000,yes,3.000000,0.000000,Sometimes,Public_Transportation,Normal_Weight
2,Male,23.000000,1.800000,77.000000,yes,no,2.0,3.0,Sometimes,no,2.000000,no,2.000000,1.000000,Frequently,Public_Transportation,Normal_Weight
3,Male,27.000000,1.800000,87.000000,no,no,3.0,3.0,Sometimes,no,2.000000,no,2.000000,0.000000,Frequently,Walking,Overweight_Level_I
4,Male,22.000000,1.780000,89.800000,no,no,2.0,1.0,Sometimes,no,2.000000,no,0.000000,0.000000,Sometimes,Public_Transportation,Overweight_Level_II
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2106,Female,20.976842,1.710730,131.408528,yes,yes,3.0,3.0,Sometimes,no,1.728139,no,1.676269,0.906247,Sometimes,Public_Transportation,Obesity_Type_III
2107,Female,21.982942,1.748584,133.742943,yes,yes,3.0,3.0,Sometimes,no,2.005130,no,1.341390,0.599270,Sometimes,Public_Transportation,Obesity_Type_III
2108,Female,22.524036,1.752206,133.689352,yes,yes,3.0,3.0,Sometimes,no,2.054193,no,1.414209,0.646288,Sometimes,Public_Transportation,Obesity_Type_III
2109,Female,24.361936,1.739450,133.346641,yes,yes,3.0,3.0,Sometimes,no,2.852339,no,1.139107,0.586035,Sometimes,Public_Transportation,Obesity_Type_III


In [29]:
# Separate our independent and dependent features.
X = data.drop(columns=["NObeyesdad"])
y = data["NObeyesdad"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [30]:
numerical_Features = [feature for feature in X_train.columns if X_train[feature].dtype != 'O']
categorical_features = [feature for feature in X_train.columns if X_train[feature].dtype == 'O']

In [31]:
# Create a pipeline that does some simple preprocessing of our data
preprocess = ColumnTransformer([
        ('scaler', StandardScaler(), numerical_Features),
        ('encoder', OneHotEncoder(sparse_output=False, drop='first', handle_unknown="ignore"), categorical_features),
    ])

In [32]:
# Create a pipeline that preprocesses the data using the ColumnTransformer and train the OvR logistic regression model.
pipeline_OvR = Pipeline([
    ('preprocessor', preprocess),
    ('model', OneVsRestClassifier(LogisticRegression(max_iter=1000)))
])

# Create a pipeline that preprocesses the data using the ColumnTransformer and train the OvO logistic regression model.
pipeline_OvO = Pipeline([
    ('preprocessor', preprocess),
    ('model', OneVsOneClassifier(LogisticRegression(max_iter=1000)))
])

In [43]:
# Evaluation using StratifiedKFold and f1_macro scoring.
kfold = StratifiedKFold(n_splits=20, shuffle=True, random_state=42)

f1_macro_OvR = cross_val_score(pipeline_OvR, X_train, y_train,
                                        cv=kfold, scoring='f1_macro').mean()

f1_macro_OvO = cross_val_score(pipeline_OvO, X_train, y_train,
                               cv=kfold, scoring='f1_macro').mean()

print(f"f1 macro score for OvR strategy is {np.round(100 * f1_macro_OvR, 2)}% ")
print(f"f1 macro score for OvO strategy is {np.round(100 * f1_macro_OvO, 2)}% ")



f1 macro score for OvR strategy is 75.11% 
f1 macro score for OvO strategy is 93.21% 
