In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, f1_score
import numpy as np
from sklearn.preprocessing import binarize

# Load data from CSV file
ert_data = pd.read_csv('rel_ERT.csv')
ela_data = pd.read_csv('median_features.csv')

# Merge the two datasets on common columns (fid and dim)
merged_data = pd.merge(ert_data, ela_data, on=['fid', 'dim'])

# Select relevant columns for features and target variables
features_columns = ["fid", "dim", "iid", "ela_meta.lin_simple.adj_r2", "ela_meta.lin_simple.intercept", "ela_meta.lin_simple.coef.min", "ela_meta.lin_simple.coef.max", "ela_meta.lin_simple.coef.max_by_min", "ela_meta.lin_w_interact.adj_r2", "ela_meta.quad_simple.adj_r2"]
target_columns = ["BSqi", "BSrr", "CMA-CSA", "fmincon", "fminunc", "HCMA",
                  "HMLSL", "IPOP400D", "MCS", "MLSL", "OQNLP", "SMAC-BBOB"]

# Get the unique algorithm identifiers from the 'fid' column
algorithms = merged_data['fid'].unique()
#result_data = pd.DataFrame(columns=['fid', 'mbse', 'F1Score','Algorithm'])
result_data = pd.DataFrame(columns=['fid', 'mbse','Algorithm'])
threshold = 0.5
for target_column in target_columns:
    selected_data = merged_data[features_columns + [target_column]]
    for alg1 in algorithms:
        # Extract data for the current pair of algorithms
        X_pair = selected_data[selected_data['fid'] == alg1][features_columns]
        y_pair = selected_data[selected_data['fid'] == alg1][target_column]
        if (X_pair.isin([np.inf, -np.inf, np.nan]).any().any()) or (y_pair.isin([np.inf, -np.inf, np.nan]).any().any()):
            break
        # Fit a linear regression model
        model = RandomForestRegressor()
        model.fit(X_pair, y_pair)
        
        # Predict using the model
        y_pred = model.predict(X_pair)

        # Calculate Mean Squared Error (MSE)
        mbse = mean_absolute_error(y_pair, y_pred)
        #y_pred_binary = np.where(y_pred >= threshold, 1, 0)

        # Calculate F1 Score
        #f1 = f1_score(y_pair, y_pred_binary)
        # Append the results to a new DataFrame
        #result_data = pd.concat([result_data, pd.DataFrame({'fid': [alg1], 'mbse': [mbse],'F1 Score':[f1], 'Algorithm': [target_column]})], ignore_index=True)
        result_data = pd.concat([result_data, pd.DataFrame({'fid': [alg1], 'mbse': [mbse], 'Algorithm': [target_column]})], ignore_index=True)

result_data.to_csv("mbse_data.csv", encoding='utf-8', index=False)
