# Manual Preprocessing

In [41]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, OneHotEncoder, OrdinalEncoder
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import CategoricalNB, MultinomialNB, GaussianNB
from sklearn.model_selection import RepeatedKFold, cross_val_score
from sklearn.metrics import classification_report


In [42]:
df_train = pd.read_csv("../our data/no_outliers.csv")
df_test = pd.read_csv("../data/obesity_test.csv")

df_test = df_test.drop(columns=["region", "marrital_status"])

# Scale and KNN-impute data
scalers = {} # Preserve scalers for antitransformation

columns = ['age', 'height', 'weight']
scaler = StandardScaler()
imputer = KNNImputer(n_neighbors=5, weights='uniform')

df_train[columns] = scaler.fit_transform(df_train[columns])
df_test[columns] = scaler.transform(df_test[columns])


# Impute
df_train[columns] = imputer.fit_transform(df_train[columns])
df_test[columns] = imputer.transform(df_test[columns])

# Transform back 
df_train[columns] = scaler.inverse_transform(df_train[columns])
df_test[columns] = scaler.inverse_transform(df_test[columns])
    


In [43]:
def classify_bmi_comprehensive(row):
    """
    Classify BMI based on age and BMI value.

    Input:
    row: A Pandas row with 'weight', 'height', and 'age' columns.

    Output:
    Returns a string that classifies the individual into BMI categories.
    """
    # Check if weight and height are valid
    if row['height'] <= 0 or row['weight'] <= 0:
        return 'Invalid data'

    # Calculate BMI
    bmi = row['weight'] / (row['height'] ** 2)

    # Age group: Children (2-19 years)
    if 2 <= row['age'] < 20:
        if bmi < 14:
            return 0 # Underweight
        elif 14 <= bmi < 18:
            return 1 # Normal weight
        elif 18 <= bmi < 21:
            return 2 # Overweight
        else:
            return 3 # Obesity 1

    # Age group: Adults (20-64 years)
    elif 20 <= row['age'] < 65:
        if bmi < 18.5:
            return 0 # "Underweight"
        elif 18.5 <= bmi < 25:
            return 1 # "Healthy Weight"
        elif 25 <= bmi < 30:
            return 2 #"Overweight"
        elif 30<= bmi < 35:
            return 3 #"Obese Class 1"
        elif 35 <= bmi < 40:
            return 4 #"Obese Class 2"
        else:
            return 5 #"Obese Class 3"

In [44]:
# Add BMI
df_train['bmi_class'] = df_train.apply(lambda row: classify_bmi_comprehensive(row), axis=1)
df_test['bmi_class'] = df_test.apply(lambda row: classify_bmi_comprehensive(row), axis=1)

In [45]:
# Fill missing activity with zero
df_train['physical_activity_perweek'].fillna('No Activity')
df_test['physical_activity_perweek'].fillna('No Activity')


0        5 or more
1      No Activity
2           1 to 2
3           1 to 2
4           3 to 4
          ...     
495    No Activity
496    No Activity
497    No Activity
498    No Activity
499         3 to 4
Name: physical_activity_perweek, Length: 500, dtype: object

In [46]:
hashmap = {
"Never": 0,
"Sometimes": 1,
"Frequently": 2,
"Always": 3,

"No Activity": 0,
"up to 2": 1,
"up to 5": 2,
"more than 5": 3,

"less than 1": 1,
"1 to 2": 2,
"more than 2": 3,
"3 to 4": 4,
"5 or more": 5,

"Bicycle": 1,
"Car": 3,
"Motorbike": 3,
"Public": 2,
"Walk": 0,

"no": 0,
"yes": 1,

"Male": 0,
"Female": 1
}


In [47]:
# Manually encode data

columns = ['alcohol_freq',
 'caloric_freq',
 'devices_perday',
 'eat_between_meals',
 'gender',
 'monitor_calories',
 'parent_overweight',
 'physical_activity_perweek',
 'smoke',
 'transportation',
 'veggies_freq',
 'water_daily',
 'bmi_class',
 'meals_perday',
 "siblings"]

for target in columns:
    df_train[target] = df_train[target].replace(hashmap)
    df_test[target]= df_test[target].replace(hashmap)


  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)


In [48]:
# Fill rest with KNN or smt else

# Scale numerical (again) and KNN-impute data

#columnsx = ['age', 'height', 'weight']
#scaler = StandardScaler()
imputer = DecisionTreeClassifier()
imputer = IterativeImputer(imputer)

#df_train[columnsx] = scaler.fit_transform(df_train[columnsx])
#df_test[columnsx] = scaler.transform(df_test[columnsx])

df_train[columns] = imputer.fit_transform(df_train[columns])
df_test[columns] = imputer.transform(df_test[columns])



In [49]:
# Transform to life score
life_columns = [
 'alcohol_freq',
 'caloric_freq',
 'devices_perday',
 'eat_between_meals',
 'monitor_calories',
 'physical_activity_perweek',
 'smoke',
 'transportation',
 'veggies_freq',
 'water_daily',
]

bad_columns = [
 'eat_between_meals',
 'alcohol_freq',
 'caloric_freq',
 'devices_perday',
 'smoke',
 'transportation',
]

good_columns = [
 'monitor_calories',
 'physical_activity_perweek',
 'veggies_freq',
 'water_daily',
]

df_train["life"] = 0
df_test["life"] = 0

for column in good_columns:
    df_train["life"] += df_train[column]
    df_test["life"] += df_test[column]


for column in bad_columns:
    df_train["life"] -= df_train[column]
    df_test["life"] -= df_test[column]




In [50]:
hash_obesity = {
 'Normal_Weight': 1,
 'Overweight_Level_I': 2,
 'Overweight_Level_II': 3,
 'Obesity_Type_I': 4,
 'Insufficient_Weight': 5,
 'Obesity_Type_II': 6,
 'Obesity_Type_III': 7
 }

X = df_train.iloc[:, 1:].drop(columns='obese_level')
y = df_train['obese_level'].replace(hash_obesity)



  y = df_train['obese_level'].replace(hash_obesity)


In [56]:
# Define general function to preprocess data without information leak
def classify_bmi_comprehensive(row):
    """
    Classify BMI based on age and BMI value.

    Input:
    row: A Pandas row with 'weight', 'height', and 'age' columns.

    Output:
    Returns a string that classifies the individual into BMI categories.
    """
    # Check if weight and height are valid
    if row['height'] <= 0 or row['weight'] <= 0:
        return 'Invalid data'

    # Calculate BMI
    bmi = row['weight'] / (row['height'] ** 2)

    # Age group: Children (2-19 years)
    if 2 <= row['age'] < 20:
        if bmi < 14:
            return 0 # Underweight
        elif 14 <= bmi < 18:
            return 1 # Normal weight
        elif 18 <= bmi < 21:
            return 2 # Overweight
        else:
            return 3 # Obesity 1

    # Age group: Adults (20-64 years)
    elif 20 <= row['age'] < 65:
        if bmi < 18.5:
            return 0 # "Underweight"
        elif 18.5 <= bmi < 25:
            return 1 # "Healthy Weight"
        elif 25 <= bmi < 30:
            return 2 #"Overweight"
        elif 30<= bmi < 35:
            return 3 #"Obese Class 1"
        elif 35 <= bmi < 40:
            return 4 #"Obese Class 2"
        else:
            return 5 #"Obese Class 3"
        
def preprocess_data(X_train, X_test):
    df_train = X_train.copy()
    df_test = X_test.copy()
    # START
    # 1. Scale and KNN-impute data
    columns = ['age', 'height', 'weight']
    scaler = StandardScaler()
    imputer = KNNImputer(n_neighbors=5, weights='uniform')

    df_train[columns] = scaler.fit_transform(df_train[columns])
    df_test[columns] = scaler.transform(df_test[columns])


    # Impute
    df_train[columns] = imputer.fit_transform(df_train[columns])
    df_test[columns] = imputer.transform(df_test[columns])

    # Transform back 
    df_train[columns] = scaler.inverse_transform(df_train[columns])
    df_test[columns] = scaler.inverse_transform(df_test[columns])

    # Add BMI 
    df_train['bmi_class'] = df_train.apply(lambda row: classify_bmi_comprehensive(row), axis=1)
    df_test['bmi_class'] = df_test.apply(lambda row: classify_bmi_comprehensive(row), axis=1)

    # Fill missing activity with 0
    df_train['physical_activity_perweek'].fillna('No Activity')
    df_test['physical_activity_perweek'].fillna('No Activity')

    # Encode categoricals with hashmap (my favourite)
    hashmap = {
        "Never": 0,
        "Sometimes": 1,
        "Frequently": 2,
        "Always": 3,

        "No Activity": 0,
        "up to 2": 1,
        "up to 5": 2,
        "more than 5": 3,

        "less than 1": 1,
        "1 to 2": 2,
        "more than 2": 3,
        "3 to 4": 4,
        "5 or more": 5,

        "Bicycle": 1,
        "Car": 3,
        "Motorbike": 3,
        "Public": 2,
        "Walk": 0,

        "no": 0,
        "yes": 1,

        "Male": 0,
        "Female": 1
        }

    columns = ['alcohol_freq',
    'caloric_freq',
    'devices_perday',
    'eat_between_meals',
    'gender',
    'monitor_calories',
    'parent_overweight',
    'physical_activity_perweek',
    'smoke',
    'transportation',
    'veggies_freq',
    'water_daily',
    'bmi_class',
    'meals_perday',
    "siblings"]

    for target in columns:
        df_train[target] = df_train[target].replace(hashmap)
        df_test[target]= df_test[target].replace(hashmap)


    # Fill the ordinal variables with iterative Random Forests
    imputer = DecisionTreeClassifier()
    imputer = IterativeImputer(imputer)

    df_train[columns] = imputer.fit_transform(df_train[columns])
    df_test[columns] = imputer.transform(df_test[columns])

    # Transform to life score
    life_columns = [
    'alcohol_freq',
    'caloric_freq',
    'devices_perday',
    'eat_between_meals',
    'monitor_calories',
    'physical_activity_perweek',
    'smoke',
    'transportation',
    'veggies_freq',
    'water_daily',
    ]

    df_train["life"] = 0
    df_test["life"] = 0

    for column in life_columns:
        df_train["life"] += df_train[column]
        df_test["life"] += df_test[column]

    return df_train, df_test

# Feature selection with wrapper method

In [57]:
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split

In [58]:
df = pd.read_csv("../our data/no_outliers.csv", index_col=0)

X = df.drop('obese_level', axis=1)
y = df['obese_level']

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3)
X_train, X_test = preprocess_data(X_train, X_test)

  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)


In [60]:
baseline = RandomForestClassifier()
k = 5 # suppose i want to select 5

rfe = RFE(estimator=baseline, n_features_to_select=k)
rfe.fit_transform(X_train, y_train)

selected_features = pd.Series(rfe.support_, index = X_train.columns)
selected_features


rfe.score(X_test, y_test)

0.9355509355509356

In [61]:
# same as above but iterate k
max_score = -999 
info = {}

for i in range(1, len(X_train.columns.tolist())+1):
    rfe = RFE(estimator=baseline, n_features_to_select=i)
    rfe.fit_transform(X_train, y_train)
    s = rfe.score(X_test, y_test)
    print(f"k={i}: {s}")
    selected_features = pd.Series(rfe.support_, index = X_train.columns)
    S=selected_features[selected_features == True].index.tolist()
    print(f"\tSelected: {S}")

    if s>max_score:
        max_score = s
        info['i'] = i
        info['selected'] = S

print("=========================*2")
print(info)


k=1: 0.5987525987525988
	Selected: ['weight']
k=2: 0.7775467775467776
	Selected: ['weight', 'bmi_class']
k=3: 0.9064449064449065
	Selected: ['height', 'weight', 'bmi_class']
k=4: 0.9272349272349273
	Selected: ['age', 'height', 'weight', 'bmi_class']
k=5: 0.9355509355509356
	Selected: ['age', 'gender', 'height', 'weight', 'bmi_class']
k=6: 0.920997920997921
	Selected: ['age', 'gender', 'height', 'weight', 'bmi_class', 'life']
k=7: 0.9334719334719335
	Selected: ['age', 'eat_between_meals', 'gender', 'height', 'weight', 'bmi_class', 'life']
k=8: 0.920997920997921
	Selected: ['age', 'alcohol_freq', 'eat_between_meals', 'gender', 'height', 'weight', 'bmi_class', 'life']
k=9: 0.920997920997921
	Selected: ['age', 'alcohol_freq', 'gender', 'height', 'meals_perday', 'veggies_freq', 'weight', 'bmi_class', 'life']
k=10: 0.9293139293139293
	Selected: ['age', 'alcohol_freq', 'eat_between_meals', 'gender', 'height', 'meals_perday', 'veggies_freq', 'weight', 'bmi_class', 'life']
k=11: 0.9230769230769

# Statistical tests for correlation

In [65]:
cor = df_train[['age', 'weight', 'height', 'life']].replace({"obese_level": hash_obesity}).iloc[:, 1:].corr()
import seaborn as sns

cor.style.highlight_between(left=-1, right=-.69, color="gold").highlight_between(left=.69, right=1, color="gold")

Unnamed: 0,weight,height,life
weight,1.0,0.467835,-0.029771
height,0.467835,1.0,-0.054706
life,-0.029771,-0.054706,1.0


In [66]:
cor = df_train.replace({"obese_level": hash_obesity}).iloc[:, 1:].corr(method="spearman")
cor.style.highlight_between(left=-1, right=-.69, color="red").highlight_between(left=.69, right=1, color="red").highlight_between(left=-0.01, right=0.01, color="green")

  cor = df_train.replace({"obese_level": hash_obesity}).iloc[:, 1:].corr(method="spearman")


Unnamed: 0,age,alcohol_freq,caloric_freq,devices_perday,eat_between_meals,gender,height,meals_perday,monitor_calories,parent_overweight,physical_activity_perweek,siblings,smoke,transportation,veggies_freq,water_daily,weight,obese_level,bmi_class,life
age,1.0,0.123448,0.118546,-0.265632,-0.125292,-0.074681,0.025875,-0.066738,-0.167786,0.270327,-0.098315,0.002985,0.079838,0.457298,0.051857,0.016021,0.383939,0.21073,0.27601,-0.084931
alcohol_freq,0.123448,1.0,0.097763,-0.045656,-0.085173,0.00549,0.125705,0.073586,-0.021606,-0.027367,-0.097897,-0.00298,0.07208,-0.018461,0.077403,0.087136,0.225436,0.138799,0.177274,-0.194688
caloric_freq,0.118546,0.097763,1.0,0.070505,-0.192583,-0.062181,0.183971,-0.022938,-0.187512,0.220008,-0.079833,-0.019638,-0.032248,0.154092,-0.03393,0.018225,0.283724,0.224562,0.243427,-0.25018
devices_perday,-0.265632,-0.045656,0.070505,1.0,0.031667,0.025505,0.069469,0.046773,-0.026652,0.033014,0.094105,-0.029841,0.017977,-0.14198,-0.063563,-0.041538,-0.039799,-0.008789,-0.052372,-0.283443
eat_between_meals,-0.125292,-0.085173,-0.192583,0.031667,1.0,0.122767,-0.085168,0.122576,0.117339,-0.193936,0.065471,0.009709,0.032299,-0.06508,0.079589,-0.184092,-0.301424,-0.131974,-0.299431,-0.11042
gender,-0.074681,0.00549,-0.062181,0.025505,0.122767,1.0,-0.642498,-0.060489,0.107133,-0.110599,0.012981,-0.01589,-0.062651,-0.129917,0.288824,-0.095782,-0.212308,0.134129,0.025323,0.160294
height,0.025875,0.125705,0.183971,0.069469,-0.085168,-0.642498,1.0,0.208886,-0.131879,0.254538,0.026187,0.028434,0.068275,0.090088,-0.081046,0.182397,0.469418,0.122214,0.117376,-0.070131
meals_perday,-0.066738,0.073586,-0.022938,0.046773,0.122576,-0.060489,0.208886,1.0,-0.007828,0.051392,0.087374,0.047442,0.031882,0.037314,0.037041,0.052312,0.045035,0.129743,0.006545,0.023217
monitor_calories,-0.167786,-0.021606,-0.187512,-0.026652,0.117339,0.107133,-0.131879,-0.007828,1.0,-0.210182,0.038413,-0.017008,0.03451,-0.043101,0.069758,0.004676,-0.215413,-0.14726,-0.140436,0.137504
parent_overweight,0.270327,-0.027367,0.220008,0.033014,-0.193936,-0.110599,0.254538,0.051392,-0.210182,1.0,-0.132913,-0.002452,0.030132,0.138176,-0.008307,0.112757,0.499149,0.259903,0.427733,-0.087879


# Statistical tests ($\chi^2$)

In [67]:
import scipy.stats as stats
from scipy.stats import mannwhitneyu


In [72]:
#note: slightly modify the code
catcol =  ['alcohol_freq',
 'caloric_freq',
 'devices_perday',
 'eat_between_meals',
 'gender',
 'monitor_calories',
 'parent_overweight',
 'physical_activity_perweek',
 'smoke',
 'transportation',
 'veggies_freq',
 'water_daily',
 'bmi_class',
 'meals_perday',
 "siblings"]


import pandas as pd
from scipy.stats import spearmanr

def test_spearman_correlation(X, y, var, alpha=0.05):
    # Calculate Spearman's rank correlation between the predictor and the target
    spearman_corr, p_value = spearmanr(X[var], y)
    
    # Interpret the result based on the p-value
    if p_value < alpha:
        result = f"{var} is IMPORTANT for prediction (Spearman's correlation = {spearman_corr:.2f}, p = {p_value:.4f})"
    else:
        result = f"{var} is NOT an important predictor (Spearman's correlation = {spearman_corr:.2f}, p = {p_value:.4f})"
    
    print(result)

# Example usage
# X is a DataFrame containing predictors, y is a Series containing the target variable, and 'var' is the column name of the predictor to test.
# test_spearman_c

X_p, _ = preprocess_data(X, X_train)
for var in catcol:
    test_spearman_correlation(X_p, y, var)


  df_train[target] = df_train[target].replace(hashmap)


alcohol_freq is IMPORTANT for prediction (Spearman's correlation = 0.13, p = 0.0000)
caloric_freq is IMPORTANT for prediction (Spearman's correlation = 0.05, p = 0.0432)
devices_perday is NOT an important predictor (Spearman's correlation = -0.04, p = 0.1098)
eat_between_meals is IMPORTANT for prediction (Spearman's correlation = -0.37, p = 0.0000)
gender is NOT an important predictor (Spearman's correlation = -0.01, p = 0.6089)
monitor_calories is IMPORTANT for prediction (Spearman's correlation = -0.08, p = 0.0011)
parent_overweight is IMPORTANT for prediction (Spearman's correlation = 0.31, p = 0.0000)
physical_activity_perweek is IMPORTANT for prediction (Spearman's correlation = -0.16, p = 0.0000)
smoke is NOT an important predictor (Spearman's correlation = -0.03, p = 0.1801)
transportation is NOT an important predictor (Spearman's correlation = 0.05, p = 0.0518)
veggies_freq is NOT an important predictor (Spearman's correlation = 0.02, p = 0.4897)
water_daily is IMPORTANT for pr



# Model testing

In [None]:
# set up rkf

def run(model, X, y):
    my_model = model
    rkf = RepeatedKFold(n_splits=10)

    scores_train = []
    scores_val = []

    i = 0
    for (train_index, test_index) in rkf.split(X, y):
        X_train = X.iloc[train_index]
        X_test = X.iloc[test_index]
        y_train = y.iloc[train_index]
        y_test = y.iloc[test_index]

        X_train, X_test = preprocess_data(X_train, X_test)

        my_model.fit(X_train, y_train)

        report1 = classification_report(y_test, y_hat:=my_model.predict(X_test), output_dict=True)
        report2 = classification_report(y_train, y_hat:=my_model.predict(X_train), output_dict=True)

        scores_val.append(report1["macro avg"]["f1-score"])
        scores_train.append(report2["macro avg"]["f1-score"])
        print(i)
        i+=1

    return np.array(scores_train).mean(), np.array(scores_val).mean(), my_model.feature_importances_

        

In [74]:
run(RandomForestClassifier(), X, y)

  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target]

(0.999978619461646,
 0.9347506713765423,
 array([0.094902  , 0.02906236, 0.0163697 , 0.01809703, 0.03011454,
        0.0627612 , 0.08804353, 0.03217063, 0.00465086, 0.02894237,
        0.01574429, 0.0148447 , 0.00189053, 0.0174163 , 0.02887657,
        0.01637259, 0.2379123 , 0.2289873 , 0.0328412 ]))

In [77]:
A = np. array([0.094902  , 0.02906236, 0.0163697 , 0.01809703, 0.03011454,
        0.0627612 , 0.08804353, 0.03217063, 0.00465086, 0.02894237,
        0.01574429, 0.0148447 , 0.00189053, 0.0174163 , 0.02887657,
        0.01637259, 0.2379123 , 0.2289873 , 0.0328412 ])

pd.DataFrame(A, index=X_train.columns, columns=["b"]).sort_values(by="b",ascending=False).style.bar()

Unnamed: 0,b
weight,0.237912
bmi_class,0.228987
age,0.094902
height,0.088044
gender,0.062761
life,0.032841
meals_perday,0.032171
eat_between_meals,0.030115
alcohol_freq,0.029062
parent_overweight,0.028942


In [78]:
run(DecisionTreeClassifier(), X, y)

  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target].replace(hashmap)
  df_test[target]= df_test[target].replace(hashmap)
  df_train[target] = df_train[target]

(1.0,
 0.8887583831335648,
 array([0.10193846, 0.03012724, 0.01176187, 0.00337386, 0.00445542,
        0.1610644 , 0.08341583, 0.01239505, 0.00134955, 0.01086837,
        0.00080973, 0.00495463, 0.        , 0.00766767, 0.00428283,
        0.00735991, 0.19896782, 0.34659038, 0.00861699]))

In [79]:
A = np.array([0.10193846, 0.03012724, 0.01176187, 0.00337386, 0.00445542,
        0.1610644 , 0.08341583, 0.01239505, 0.00134955, 0.01086837,
        0.00080973, 0.00495463, 0.        , 0.00766767, 0.00428283,
        0.00735991, 0.19896782, 0.34659038, 0.00861699])


pd.DataFrame(A, index=X_train.columns, columns=["b"]).sort_values(by="b",ascending=False).style.bar()

Unnamed: 0,b
bmi_class,0.34659
weight,0.198968
gender,0.161064
age,0.101938
height,0.083416
alcohol_freq,0.030127
meals_perday,0.012395
caloric_freq,0.011762
parent_overweight,0.010868
life,0.008617


In [27]:
run(LogisticRegression(solver="liblinear"), X, y)

AttributeError: 'LogisticRegression' object has no attribute 'feature_importances_'

In [None]:
# run(LogisticRegression(solver="newton-cg"), X, y)

In [None]:
# run(LogisticRegression(solver="newton-cholesky"), X, y)

In [None]:
#run(GaussianNB(), X, y)