In [1]:
# import solver
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
import sys, os
file_names = os.listdir('.')
data = { filename: float(filename[:-4]) for filename in file_names if filename[-4:] == '.csv' and filename != 'test_est.csv' }
data = pd.DataFrame(data.items(), columns=['filename', 'value'])
# initial column 'X' with np.array
L = data['value'].values
L

array([40817.759, 22981.229, 52624.087, 19950.142, 17737.757, 21844.753,
       31369.548])

In [3]:
prediction = np.zeros((len(L), 10))
for i in range(len(L)):
    df = pd.read_csv(data['filename'][i])
    prediction[i,:] = df['Outcome'].values
X = prediction
X[0]

array([236055.66666667, 370113.5       , 112997.66666667, 242601.        ,
       132288.        ,  72128.33333333, 284858.33333333, 121413.83333333,
       188869.33333333, 145681.        ])

In [4]:
import numpy as np
from scipy.optimize import minimize

# 初始猜测组和相应的 MSE 值
# 假设 guess_sets 是一个包含多组猜测的二维数组，每行为一组猜测的10个数值
# 假设 mse_values 是一个包含对应的MSE值的一维数组
guess_sets = X
mse_values = L **2

# 定义目标函数，用于计算猜测值与真实值的差距
def mse_loss(true_values):
    # 计算每组猜测的 MSE
    losses = np.mean((guess_sets - true_values) ** 2, axis=1)
    # 计算与已有 MSE 值的差距
    return np.sum((losses - mse_values) ** 2)

# 初始真实值猜测（可以根据情况调整）
initial_guess = np.mean(guess_sets, axis=0)

# 使用 scipy.optimize.minimize 进行优化
result = minimize(mse_loss, initial_guess, method='L-BFGS-B')

losses = np.mean((guess_sets - result.x) ** 2, axis=1)

# 打印优化结果
if result.success:
    print("估计的真实值为：", result.x)
else:
    print("优化未成功，请检查输入数据。")

估计的真实值为： [131040.18179937 368654.06162485 104870.2200439  204389.40014767
 158600.27192379 114707.86495975 256617.91602662  98788.16167541
 182015.25117097 161255.27129665]


In [5]:
# 查看和真实值的差距
# print("每组猜测的 MSE：", losses)
# print("总体 MSE：", np.mean(losses))
print("diff", losses - mse_values)
print("theta", result.x)


diff [ 4.79221344e-05  4.96208668e-04 -3.76701355e-05  4.15921211e-04
 -1.83105469e-04 -6.05344772e-04 -4.78863716e-04]
theta [131040.18179937 368654.06162485 104870.2200439  204389.40014767
 158600.27192379 114707.86495975 256617.91602662  98788.16167541
 182015.25117097 161255.27129665]


In [6]:
test_set = pd.read_csv('../predictions/40817.759.csv')
test_x = test_set['Outcome'].values
test_x

array([236055.66666667, 370113.5       , 112997.66666667, 242601.        ,
       132288.        ,  72128.33333333, 284858.33333333, 121413.83333333,
       188869.33333333, 145681.        ])

In [7]:
pred_L = np.mean((test_x - result.x) ** 2)
pred_L = np.sqrt(pred_L)
pred_L

40817.75900000059

In [8]:
# save the result
df = pd.DataFrame({'Outcome': result.x})
df.to_csv('test_est.csv', index=False)