In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

# input
data = {
    'Year': ['2018-2019', '2019-2020', '2020-2021', '2021-2022'],
    'GDP_Y': [18.75, 16.49, 18.12, 18.82],
    'Positive_Destination_x1': [92.80, 92.10, 93.20, 93.40],
    'Expenditure_x2': [35.303163, 37.701317, 49.352117, 53.247205],
    'Average_age_x3': [79.13, 78.875, 78.69, 78.685],
    'Investment_x4': [59.3, 46.5, 75.4, 95.3]
}

df = pd.DataFrame(data)
print("data:")
print(df)

# get values
Y = df['GDP_Y'].values
X1 = df['Positive_Destination_x1'].values
X2 = df['Expenditure_x2'].values
X3 = df['Average_age_x3'].values
X4 = df['Investment_x4'].values

#  Cobb-Douglas
# Y = A * X1^α1 * X2^α2 * X3^α3 * X4^α4
def cobb_douglas(X, A, alpha1, alpha2, alpha3, alpha4):
    x1, x2, x3, x4 = X
    return A * (x1**alpha1) * (x2**alpha2) * (x3**alpha3) * (x4**alpha4)


# ln(Y) = ln(A) + α1*ln(X1) + α2*ln(X2) + α3*ln(X3) + α4*ln(X4)
def log_linear_fit():
    
    ln_Y = np.log(Y)
    ln_X1 = np.log(X1)
    ln_X2 = np.log(X2)
    ln_X3 = np.log(X3)
    ln_X4 = np.log(X4)
    
    
    X_matrix = np.column_stack([np.ones(len(Y)), ln_X1, ln_X2, ln_X3, ln_X4])
    
    
    coeffs = np.linalg.lstsq(X_matrix, ln_Y, rcond=None)[0]

    return coeffs
    print("\n=== Model ===")
try:
    log_coeffs = log_linear_fit()
    ln_A, alpha1, alpha2, alpha3, alpha4 = log_coeffs
    A = np.exp(ln_A)
    
    Y_pred_log = A * (X1**alpha1) * (X2**alpha2) * (X3**alpha3) * (X4**alpha4)
    r2_log = r2_score(Y, Y_pred_log)
    rmse_log = np.sqrt(mean_squared_error(Y, Y_pred_log))
    
    print(f"coefficient: A={A:.2e}, α1={alpha1:.4f}, α2={alpha2:.4f}, α3={alpha3:.4f}, α4={alpha4:.4f}")
    print(f"Function: Y = {A:.2e} * X1^{alpha1:.4f} * X2^{alpha2:.4f} * X3^{alpha3:.4f} * X4^{alpha4:.4f}")
    print(f"R² = {r2_log:.4f}, RMSE = {rmse_log:.2f}")
    
    print("actual value  vs predicted value:")
    for i in range(len(Y)):
        print(f"{data['Year'][i]}: actual={Y[i]:.0f}, predicted={Y_pred_log[i]:.0f}, error={abs(Y[i]-Y_pred_log[i])/Y[i]*100:.2f}%")
    
except Exception as e:
    print(f"fail: {e}")

data:
        Year  GDP_Y  Positive_Destination_x1  Expenditure_x2  Average_age_x3  \
0  2018-2019  18.75                     92.8       35.303163          79.130   
1  2019-2020  16.49                     92.1       37.701317          78.875   
2  2020-2021  18.12                     93.2       49.352117          78.690   
3  2021-2022  18.82                     93.4       53.247205          78.685   

   Investment_x4  
0           59.3  
1           46.5  
2           75.4  
3           95.3  
coefficient: A=2.02e-01, α1=7.5512, α2=-0.4879, α3=-6.6246, α4=0.2492
Function: Y = 2.02e-01 * X1^7.5512 * X2^-0.4879 * X3^-6.6246 * X4^0.2492
R² = 1.0000, RMSE = 0.00
actual value  vs predicted value:
2018-2019: actual=19, predicted=19, error=0.00%
2019-2020: actual=16, predicted=16, error=0.00%
2020-2021: actual=18, predicted=18, error=0.00%
2021-2022: actual=19, predicted=19, error=0.00%
