In [None]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
import xgboost as xgb
import pandas as pd
import numpy as np

feature_columns = merged_df.columns[8:]
target_columns = ['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4']

X = merged_df[feature_columns]

params = {
    'n_estimators': [500, 750, 1000],
    'max_depth': [3, 4, 5, 6, 7],
    'learning_rate': [0.075],
    'reg_alpha': [5, 10, 15, 30],
    'objective': ['reg:squarederror'],
    'seed': [33]
}

# Create a DataFrame to store the best hyperparameters and scores for each target variable
results_df = pd.DataFrame(columns=['target_variable', 'best_parameters', 'best_score'])

for target_column in target_columns:
    print(f"Optimizing hyperparameters for {target_column}")
    y = merged_df[target_column]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=33)

    randomized_search = RandomizedSearchCV(estimator=xgb.XGBRegressor(), param_distributions=params, n_iter=20, cv=5, random_state=33)
    randomized_search.fit(X_train, y_train)

    print(f"Best parameters for {target_column}: {randomized_search.best_params_}")
    print(f"Best score for {target_column}: {randomized_search.best_score_}\n")
    
    # Append the results to the results_df DataFrame
    new_row = {'target_variable': target_column,
            'best_parameters': randomized_search.best_params_,
            'best_score': randomized_search.best_score_}
    results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)


# Save the results DataFrame to a CSV file
results_df.to_csv('best_hyperparameters_scores_randomized.csv', index=False)
