In [14]:
import pandas                           as pd
import numpy                            as np
from sklearn.linear_model               import LogisticRegression
from sklearn.multiclass                 import OneVsOneClassifier, OneVsRestClassifier
from sklearn.model_selection            import train_test_split, StratifiedKFold, cross_val_score
from sklearn.compose                    import ColumnTransformer
from sklearn.pipeline                   import Pipeline
from sklearn.preprocessing              import StandardScaler, OneHotEncoder

file_path = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/GkDzb7bWrtvGXdPOfk6CIg/Obesity-level-prediction-dataset.csv"
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Female,21.0,1.62,64.0,yes,no,2.0,3.0,Sometimes,no,2.0,no,0.0,1.0,no,Public_Transportation,Normal_Weight
1,Female,21.0,1.52,56.0,yes,no,3.0,3.0,Sometimes,yes,3.0,yes,3.0,0.0,Sometimes,Public_Transportation,Normal_Weight
2,Male,23.0,1.8,77.0,yes,no,2.0,3.0,Sometimes,no,2.0,no,2.0,1.0,Frequently,Public_Transportation,Normal_Weight
3,Male,27.0,1.8,87.0,no,no,3.0,3.0,Sometimes,no,2.0,no,2.0,0.0,Frequently,Walking,Overweight_Level_I
4,Male,22.0,1.78,89.8,no,no,2.0,1.0,Sometimes,no,2.0,no,0.0,0.0,Sometimes,Public_Transportation,Overweight_Level_II


In [15]:
X = data.drop(columns=["NObeyesdad"])
y = data["NObeyesdad"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

numerical_features = [feature for feature in X_train.columns if X_train[feature].dtype != 'O']
categorical_features = [feature for feature in X_train.columns if X_train[feature].dtype == 'O']

In [16]:
processor = ColumnTransformer([
        ("Scaler", StandardScaler(), numerical_features),
        ("Encoder", OneHotEncoder(sparse_output=False, drop="first", handle_unknown="ignore"), categorical_features),
    ])

pipeline_OvR = Pipeline([
    ("processor", processor),
    ("Model", OneVsRestClassifier(LogisticRegression(max_iter=1000)))
])

pipeline_OvO = Pipeline([
    ("processor", processor),
    ("Model", OneVsOneClassifier(LogisticRegression(max_iter=1000)))
])

In [18]:
kfold = StratifiedKFold(n_splits=15, shuffle=True, random_state=42)

f1_macro_OvR = cross_val_score(pipeline_OvR, X_train, y_train, cv=kfold, scoring="f1_macro").mean()
f1_macro_OvO = cross_val_score(pipeline_OvO, X_train, y_train, cv=kfold, scoring="f1_macro").mean()

print(f"F1 score for OvR: {np.round(100 * f1_macro_OvR, 2)}%")
print(f"F1 score for OvO: {np.round(100 * f1_macro_OvO, 2)}%")



F1 score for OvR: 75.81%
F1 score for OvO: 93.05%
