In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from mrmr import mrmr_classif, mrmr_regression
from sklearn.preprocessing import StandardScaler


In [2]:
df = pd.read_excel('Eye_Features.xlsx')

In [14]:
# Select columns to scale (excluding Gender)
feature_cols_to_scale = ['Age', 'Height', 'Weight', 'Mean_Red', 'Mean_Green', 'Mean_Blue', 
                         'Mean_R-G', 'Erythema_Index', 'Mean_Hue', 'Mean_Saturation', 'Mean_Value', 
                         'High_Hue_Ratio', 'Brightness', 'Entropy', 'Mean_L', 'Mean_A', 'Mean_B', 
                         'Mean_G1', 'Mean_G2', 'Mean_G3', 'Mean_G4', 'Mean_G5']
target_col = 'Hb Value'  

In [15]:
X = df[feature_cols_to_scale]
Y = df[target_col]

In [16]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = pd.DataFrame(X_scaled, columns=feature_cols_to_scale)
X['Gender'] = df['Gender']

In [17]:
feature_cols = ['Age', 'Height', 'Weight', 'Mean_Red', 'Mean_Green', 'Mean_Blue', 
                'Mean_R-G', 'Erythema_Index', 'Mean_Hue', 'Mean_Saturation', 'Mean_Value', 
                'High_Hue_Ratio', 'Brightness', 'Entropy', 'Mean_L', 'Mean_A', 'Mean_B', 
                'Mean_G1', 'Mean_G2', 'Mean_G3', 'Mean_G4', 'Mean_G5', 'Gender']
target_col = 'Hb Value'  

In [18]:
X = df[feature_cols]
y = df[target_col]

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
selected_features = mrmr_regression(X_train, y_train, K=10)
selected_features

100%|██████████| 10/10 [00:00<00:00, 12.36it/s]


['Gender',
 'High_Hue_Ratio',
 'Height',
 'Erythema_Index',
 'Weight',
 'Age',
 'Mean_Hue',
 'Mean_Green',
 'Mean_A',
 'Mean_Saturation']

In [22]:
X = df[selected_features]

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from xgboost import XGBRegressor


In [38]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet": ElasticNet(),
    "Polynomial Regression (Degree=2)": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
    "Support Vector Regressor": SVR(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "XGBoost Regressor": XGBRegressor(n_estimators=100, random_state=42),
    "KNN Regressor": KNeighborsRegressor(),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(100,), max_iter=1000, random_state=42),

}

results = []
for name, model in models.items():
    model.fit(X_train, y_train)  
    y_pred = model.predict(X_test)  
    
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results.append({"Model": name, "MAE": mae, "MSE": mse, "R2 Score": r2})

results_df = pd.DataFrame(results).sort_values(by="MAE", ascending=True)


In [39]:
results_df

Unnamed: 0,Model,MAE,MSE,R2 Score
0,Linear Regression,1.108915,2.095872,0.384706
1,Ridge Regression,1.114775,2.111338,0.380166
7,Random Forest,1.250753,2.440041,0.283667
11,Neural Network,1.255255,2.942724,0.136092
4,Polynomial Regression (Degree=2),1.265584,2.583902,0.241433
3,ElasticNet,1.278361,2.526151,0.258388
8,Gradient Boosting,1.307162,2.58984,0.23969
2,Lasso Regression,1.310444,2.592338,0.238957
5,Support Vector Regressor,1.36658,2.849869,0.163352
9,XGBoost Regressor,1.372425,2.84326,0.165292
