In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [None]:
# Load the dataset
data = pd.read_csv("Multiple_Linear_regression_results.csv") # import your own data here

# Drop rows with missing values
data.dropna(inplace=True)

In [None]:
# Define the list of predictor variable combinations
predictor_combos = [['Spec_suit'], ['Spatial_res'], ['Spec_suit', 'Spatial_res']]

In [None]:
# Dictionary to store AIC scores, model equations, and R-squared values for each model
model_results = {}

# Fit MLR for each combination of predictor variables and calculate AIC, model equation, and R-squared
for combo in predictor_combos:
    predictors = list(combo)
    X = sm.add_constant(data[predictors])
    y = data['RMSE_val']
    model = LinearRegression().fit(X, y)
    
    # Calculate number of parameters
    num_params = len(predictors) + 1  # Include intercept
    
    # Calculate maximum likelihood
    residuals = y - model.predict(X)
    sigma2 = np.mean(residuals**2)
    max_likelihood = np.prod(1 / np.sqrt(2 * np.pi * sigma2) * np.exp(-(residuals**2) / (2 * sigma2)))
    
    # Calculate AIC
    aic = 2 * num_params - 2 * np.log(max_likelihood)
    
    # Store AIC in the dictionary
    model_results[tuple(predictors)] = {
        'AIC': aic,
        'Intercept': model.intercept_,
        'Coefficients': model.coef_[1:],  # Exclude intercept
        'R-squared': model.score(X, y),
    }

# Print AIC scores, model equations, and R-squared values
print("Model results:")
ranked_models = sorted(model_results.items(), key=lambda x: x[1]['AIC'])
for predictors, results in ranked_models:
    print(f"Predictors: {predictors} - AIC: {results['AIC']:.4f}")
    equation = f"RMSE_val = {results['Intercept']:.4f}"
    for i, predictor in enumerate(predictors):
        equation += f" + {results['Coefficients'][i]:.4f} * {predictor}"
    print("Model equation:", equation)
    print("R-squared:", results['R-squared'])
    print()