In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from mrmr import mrmr_classif, mrmr_regression
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_excel('Palm_Features.xlsx')

In [3]:
# Select columns to scale (excluding Gender)
feature_cols_to_scale = ['Age', 'Height', 'Weight', 'Mean_Red', 'Mean_Green', 'Mean_Blue', 
                         'Mean_R-G', 'Erythema_Index', 'Mean_Hue', 'Mean_Saturation', 'Mean_Value', 
                         'High_Hue_Ratio', 'Brightness', 'Entropy', 'Mean_L', 'Mean_A', 'Mean_B', 
                         'Mean_G1', 'Mean_G2', 'Mean_G3', 'Mean_G4', 'Mean_G5']
target_col = 'Hb Value'  

In [4]:
X = df[feature_cols_to_scale]
Y = df[target_col]

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = pd.DataFrame(X_scaled, columns=feature_cols_to_scale)
X['Gender'] = df['Gender']

In [6]:
feature_cols = ['Age', 'Height', 'Weight', 'Mean_Red', 'Mean_Green', 'Mean_Blue', 
                'Mean_R-G', 'Erythema_Index', 'Mean_Hue', 'Mean_Saturation', 'Mean_Value', 
                'High_Hue_Ratio', 'Brightness', 'Entropy', 'Mean_L', 'Mean_A', 'Mean_B', 
                'Mean_G1', 'Mean_G2', 'Mean_G3', 'Mean_G4', 'Mean_G5', 'Gender']
target_col = 'Hb Value'  

In [7]:
X = df[feature_cols]
y = df[target_col]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
selected_features = mrmr_regression(X_train, y_train, K=10)
selected_features

100%|██████████| 10/10 [00:00<00:00, 20.33it/s]


['Gender',
 'Mean_B',
 'Age',
 'Height',
 'Weight',
 'Mean_Hue',
 'Entropy',
 'Mean_G1',
 'Mean_Blue',
 'High_Hue_Ratio']

In [10]:
X = df[selected_features]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from xgboost import XGBRegressor


In [13]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet": ElasticNet(),
    "Polynomial Regression (Degree=2)": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
    "Support Vector Regressor": SVR(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "XGBoost Regressor": XGBRegressor(n_estimators=100, random_state=42),
    "KNN Regressor": KNeighborsRegressor(),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42),

}

results = []
for name, model in models.items():
    model.fit(X_train, y_train)  
    y_pred = model.predict(X_test)  
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results.append({"Model": name, "MAE": mae, "MSE": mse, "R2 Score": r2})

results_df = pd.DataFrame(results).sort_values(by="MAE", ascending=True)


In [15]:
results_df

Unnamed: 0,Model,MAE,MSE,R2 Score
0,Linear Regression,1.342198,3.149976,0.29987
1,Ridge Regression,1.344493,3.153133,0.299169
7,Random Forest,1.351417,3.209592,0.28662
8,Gradient Boosting,1.367732,3.347506,0.255966
4,Polynomial Regression (Degree=2),1.512719,3.754242,0.165563
9,XGBoost Regressor,1.562053,4.209574,0.064359
3,ElasticNet,1.608555,3.851002,0.144057
2,Lasso Regression,1.620262,3.898667,0.133462
10,KNN Regressor,1.647857,4.109014,0.086709
5,Support Vector Regressor,1.681688,4.070006,0.09538
