In [38]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler


import xgboost as xgb
import lightgbm as lgb
import catboost as cb

from sklearn.model_selection import KFold
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingRegressor

from tensorflow import keras
from keras import layers

In [39]:
df = pd.read_csv('dataset_regression/parkinsons_updrs_cleaned.data')

In [40]:
df.shape

(2296, 8)

In [41]:
X= df.drop('motor_UPDRS', axis=1)
y = df['motor_UPDRS']

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [43]:
meta_model = LinearRegression()

# stacking regression algorithm
stacking_model = StackingRegressor(
    estimators=[
        ('Extra Trees Regressor', ExtraTreesRegressor(n_estimators=400, max_depth=20, min_samples_split=2, max_features='sqrt')),
        ('Gradient Boosting Regressor', GradientBoostingRegressor(n_estimators=500, learning_rate=0.01, max_depth=10)),
        ('XGBoost', xgb.XGBRegressor(n_estimators=400, learning_rate=0.01, max_depth=10)),
        ('LightGBM', lgb.LGBMRegressor(n_estimators=500, learning_rate=0.01, max_depth=12, verbose=0)),
        ('CatBoost', cb.CatBoostRegressor(n_estimators=400, learning_rate=0.01, depth=6, verbose=0)),
        ('Neural Network', keras.models.Sequential([
            keras.Input(shape=(X.shape[1],)),
            layers.BatchNormalization(),       
            layers.Dense(16, activation='relu'),     
            layers.BatchNormalization(),
            layers.Dense(8, activation='relu'),   
            layers.BatchNormalization(),
            layers.Dense(22, activation='relu'),
            layers.BatchNormalization(),
            layers.Dense(1)
        ]))
    ],
    
    final_estimator=meta_model, 
    cv=KFold(n_splits=5, shuffle=True, random_state=42), verbose=2, n_jobs=-1
)

In [44]:
 #fit the model
meta_model.fit(X_train, y_train)  

# test predictions and metrics
predictions = meta_model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse).round(2)    
r2 = r2_score(y_test, predictions)

print()
print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2: {r2:.2f}")


MAE: 2.97
MSE: 12.47
RMSE: 3.53
R2: 0.73


In [45]:
# Create new dataframe to test the model 
new_data = pd.DataFrame({
    'age': [72],
    'HNR': [21.640],    
    'RPDE': [0.572],    
    'DFA': [0.718],
    'PPE': [0.160],
    'Jitter_combined': [0.003],
    'Shimmer_combined': [0.0256],
})  

In [46]:
predictions = meta_model.predict(new_data)
print("Predictions:")   
print(predictions[0].round(2))

Predictions:
25.35
