# 版图电容预测

## 基本库导入

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt


## 参数设置

In [2]:
# config
pattern_num = 4
random_state = 42
reserve_num = 2000
n_components = 1000

DISABLE_DDR = True
DISABLE_STD = True

## 数据导入

In [3]:
def data_process(x, reserve_num):  # reserve percentage
	valid_num = np.array([reserve_num if len(i) > reserve_num else len(i) for i in x]).reshape(-1, 1)
	# reserve_num = round(max(nums) * percentage)
	new_x = np.zeros(shape=(len(x), reserve_num, 5), dtype=np.float32)
	for i in range(len(x)):
		num = min(len(x[i]), reserve_num)
		new_x[i][:num] = x[i][:num]

	return new_x, valid_num

dir_prj = "D:/learn_more_from_life/computer/EDA/work/prj/rc_predict/"
dir_load = os.path.join(dir_prj, "data/convert_data/pattern{}".format(pattern_num))
x_total_ = np.load(os.path.join(dir_load, "x_total.npy"), allow_pickle=True)
x_total, total_valid_num = data_process(x_total_, reserve_num)
x_couple_ = np.load(os.path.join(dir_load, "x_couple.npy"), allow_pickle=True)
x_couple, couple_valid_num = data_process(x_couple_, reserve_num)
y_total = np.load(os.path.join(dir_load, "y_total.npy")).reshape(-1, 1)
y_total = np.concatenate([total_valid_num, y_total], axis=1)
y_couple = np.load(os.path.join(dir_load, "y_couple.npy")).reshape(-1, 1)
y_couple = np.concatenate([couple_valid_num, y_couple], axis=1)
print(x_total.shape)
print(x_couple[0])
print(y_total.shape)
print(y_couple[:10])

(640, 2000, 5)
[[ 0.00e+00  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [-1.33e-01  0.00e+00  0.00e+00 -1.20e-01 -1.00e+02]
 [ 7.30e-02  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 ...
 [ 0.00e+00  0.00e+00  0.00e+00  0.00e+00  0.00e+00]
 [ 0.00e+00  0.00e+00  0.00e+00  0.00e+00  0.00e+00]
 [ 0.00e+00  0.00e+00  0.00e+00  0.00e+00  0.00e+00]]
(640, 2)
[[1848.            4.71537018]
 [1848.            4.71537018]
 [1488.            3.26024795]
 [1488.            3.26024795]
 [1284.            2.67295098]
 [1284.            2.67295098]
 [1372.            3.07517195]
 [1372.            3.07517195]
 [1956.            5.36869097]
 [1956.            5.36869097]]


## 数据分割

In [4]:
# data split 6:2:2
from sklearn.model_selection import train_test_split

x_total_train, x_total_valid_test, y_total_train, y_total_valid_test = train_test_split(x_total, y_total, test_size=0.4, random_state=random_state, shuffle=True)
x_total_valid, x_total_test, y_total_valid, y_total_test = train_test_split(x_total_valid_test, y_total_valid_test, test_size=0.5, random_state=random_state, shuffle=True)
print('total x len:', len(x_total_train), len(x_total_valid), len(x_total_test))
print('x total train first 10\n', x_total_train[0][:10])
print('x total train last 10\n', x_total_train[0][-10:])
print('y total train first 10\n', y_total_train[:10])

x_couple_train, x_couple_valid_test, y_couple_train, y_couple_valid_test = train_test_split(x_couple, y_couple, test_size=0.4, random_state=random_state, shuffle=True)
x_couple_valid, x_couple_test, y_couple_valid, y_couple_test = train_test_split(x_couple_valid_test, y_couple_valid_test, test_size=0.5, random_state=random_state, shuffle=True)
print('couple x len', len(x_couple_train), len(x_couple_valid), len(x_couple_test))
print('x couple train first 10\n', x_couple_train[0][:10])
print('x couple train last 10\n', x_couple_train[0][-10:])
print('y couple train first 10\n', y_couple_train[:10])

total x len: 384 128 128
x total train first 10
 [[ 0.00e+00  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 8.90e-02  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [-1.72e-01  0.00e+00  0.00e+00  1.20e-01  1.00e+02]
 [ 2.17e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 3.06e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 4.34e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 5.23e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 6.51e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 7.40e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]
 [ 8.68e-01  0.00e+00  0.00e+00  3.20e-02  1.00e+02]]
x total train last 10
 [[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
y total train first 10
 [[1380.            7.71336603]
 [1284.            9.43041134]
 [ 692.            8.08362389]
 [1008.            9.28992271]
 [ 984.            7.78516483]
 [1236.           10.47247601]
 [ 812

## 数据预处理

In [5]:
from sklearn.preprocessing import StandardScaler


def cal_mean_std(x, y):
    num_s = 0
    num_sum = round(np.sum(y[:, 0]))
    x_valid = np.zeros((num_sum, 5), dtype=np.float32)
    mask = np.zeros(shape=x.shape, dtype=np.int32)
    for i in range(len(x)):
        num = round(y[i][0])
        x_valid[num_s:num_s+num, :] = x[i][:num]
        mask[i][:num] = 1
        num_s += num
    mean = np.mean(x_valid, axis=0)
    std = np.std(x_valid, axis=0)
    std[std == 0] = 1

    return mean, std

def get_mask(x, y):
    mask = np.zeros(shape=x.shape, dtype=np.int32)
    for i in range(len(x)):
        num = round(y[i][0])
        mask[i][:num] = 1

    return mask

def data_process(x, y, mean, std):
    mask = get_mask(x, y)

    return np.multiply((x - mean) / std, mask)

# x
# total
mean_total, std_total = cal_mean_std(x_total_train, y_total_train)
print('mean total:', mean_total)
print('std total:', std_total)
x_total_train_std = data_process(x_total_train, y_total_train, mean_total, std_total)
x_total_train_std_flat = x_total_train_std.reshape(len(x_total_train_std), -1)
x_total_valid_std = data_process(x_total_valid, y_total_valid, mean_total, std_total)
x_total_valid_std_flat = x_total_valid_std.reshape(len(x_total_valid_std), -1)
x_total_test_std = data_process(x_total_test, y_total_test, mean_total, std_total)
x_total_test_std_flat = x_total_test_std.reshape(len(x_total_test_std), -1)
print('x total train std flat first 20\n', x_total_train_std_flat[0][:20])
print('x total train std flat last 20\n', x_total_train_std_flat[0][-20:])

# couple
mean_couple, std_couple = cal_mean_std(x_couple_train, y_couple_train)
print('mean couple:', mean_couple)
print('std couple:', std_couple)
x_couple_train_std = data_process(x_couple_train, y_couple_train, mean_couple, std_couple)
x_couple_train_std_flat = x_couple_train_std.reshape(len(x_couple_train_std), -1)
x_couple_valid_std = data_process(x_couple_valid, y_couple_valid, mean_couple, std_couple)
x_couple_valid_std_flat = x_couple_valid_std.reshape(len(x_couple_valid_std), -1)
x_couple_test_std = data_process(x_couple_test, y_couple_test, mean_couple, std_couple)
x_couple_test_std_flat = x_couple_test_std.reshape(len(x_couple_test_std), -1)
print('x couple train std flat first 20\n', x_couple_train_std_flat[0][:20])
print('x couple train std flat last 20\n', x_couple_train_std_flat[0][-20:])

# y
# total
scaler_yt = StandardScaler()
y_total_train_std = y_total_train[:, 1].copy().reshape(-1, 1)
# scaler_yt.fit(y_total_train_std)
# y_total_train_std = scaler_yt.transform(y_total_train_std)
# print('y total train std first 10\n', y_total_train_std[:10].reshape(-1))

# # couple
scaler_yc = StandardScaler()
y_couple_train_std = y_couple_train[:, 1].copy().reshape(-1, 1)
# scaler_yc.fit(y_couple_train_std)
# y_couple_train_std = scaler_yc.transform(y_couple_train_std)
# print('y couple train std first 10\n', y_couple_train_std[:10].reshape(-1))

if DISABLE_STD:
    x_total_train_std_flat = x_total_train.reshape(len(x_total_train), -1)
    x_total_valid_std_flat = x_total_valid.reshape(len(x_total_valid), -1)
    x_total_test_std_flat = x_total_test.reshape(len(x_total_test), -1)
    x_couple_train_std_flat = x_couple_train.reshape(len(x_couple_train), -1)
    x_couple_valid_std_flat = x_couple_valid.reshape(len(x_couple_valid), -1)
    x_couple_test_std_flat = x_couple_test.reshape(len(x_couple_test), -1)


mean total: [7.4687828e+01 0.0000000e+00 0.0000000e+00 6.1754137e-02 1.0000000e+02]
std total: [4.3193623e+01 1.0000000e+00 1.0000000e+00 2.2523755e-02 1.0000000e+00]
x total train std flat first 20
 [-1.72914016  0.          0.         -1.3210113   0.         -1.72707975
  0.          0.         -1.3210113   0.         -1.73312223  0.
  0.          2.58597469  0.         -1.72411621  0.          0.
 -1.3210113   0.        ]
x total train std flat last 20
 [-0.  0.  0. -0. -0. -0.  0.  0. -0. -0. -0.  0.  0. -0. -0. -0.  0.  0.
 -0. -0.]
mean couple: [7.4687828e+01 0.0000000e+00 0.0000000e+00 6.1579026e-02 9.9810188e+01]
std couple: [4.3193623e+01 1.0000000e+00 1.0000000e+00 2.2968883e-02 6.1557627e+00]
x couple train std flat first 20
 [-1.72914016e+00  0.00000000e+00  0.00000000e+00 -1.28778684e+00
  3.08347996e-02 -1.73312223e+00  0.00000000e+00  0.00000000e+00
 -7.90543556e+00 -3.24590454e+01 -1.72707975e+00  0.00000000e+00
  0.00000000e+00 -1.28778684e+00  3.08347996e-02 -1.724116

## 命名简化

In [6]:
# total 
xt_train = x_total_train_std_flat
xt_valid = x_total_valid_std_flat
xt_test = x_total_test_std_flat
yt_train = y_total_train[:, 1].reshape(-1, 1)
yt_valid = y_total_valid[:, 1].reshape(-1, 1)
yt_test = y_total_test[:, 1].reshape(-1, 1)

# couple
xc_train = x_couple_train_std_flat
xc_valid = x_couple_valid_std_flat
xc_test = x_couple_test_std_flat
yc_train = y_couple_train[:, 1].reshape(-1, 1)
yc_valid = y_couple_valid[:, 1].reshape(-1, 1)
yc_test = y_couple_test[:, 1].reshape(-1, 1)

## 模型预测

#### 模型评估

In [7]:
from sklearn.metrics import mean_squared_error, r2_score, mean_squared_error


def model_analysis(model, scaler, x_train, y_train, x_valid, y_valid, x_test, y_test, name):
    model.fit(x_train, y_train.ravel())
    y_valid_predict = model.predict(x_valid).reshape(-1, 1)
    y_test_predict = model.predict(x_test).reshape(-1, 1)
    mse_train = mean_squared_error(y_train, model.predict(x_train))
    mse_valid = mean_squared_error(y_valid, y_valid_predict)
    mse_test = mean_squared_error(y_test, y_test_predict)
    r2_train = r2_score(y_train, model.predict(x_train))
    r2_valid = r2_score(y_valid, y_valid_predict)
    r2_test = r2_score(y_test, y_test_predict)
    print(f"{name} train mse:", mse_train)
    print(f"{name} train r2:", r2_train)
    print(f"{name} valid mse:", mse_valid)
    print(f"{name} valid r2:", r2_valid)
    print(f"{name} test mse:", mse_test)
    print(f"{name} test r2:", r2_test)
    print('')
    analysis_result(y_valid, y_valid_predict, title=f'{name} valid analysis')
    analysis_result(y_test, y_test_predict, title=f'{name} test analysis')
    print('')


def analysis_result(y, y_predict, title):
    relative_error = np.abs(y - y_predict) / y
    max_error = np.max(relative_error)
    mean_error = np.mean(relative_error)
    std_error = np.std(relative_error)
    num_good = np.sum(relative_error <= 0.05) / len(y)
    num_bad = np.sum(relative_error > 0.05) / len(y)
    print(title)
    print('max error:', max_error)
    print('mean error:', mean_error)
    print('std error:', std_error)
    print('good ratio:', num_good)
    print('bad ratio:', num_bad)
    print('')


### 线性回归

In [8]:
from sklearn.linear_model import LinearRegression


# total
lr_t = LinearRegression()
model_analysis(lr_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'linear total')

# couple
lr_c = LinearRegression()
model_analysis(lr_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'linear couple')

linear total train mse: 0.02180045202068352
linear total train r2: 0.9761285157589505
linear total valid mse: 9.888562455938722
linear total valid r2: -11.219933055719629
linear total test mse: 3.460107364310648
linear total test r2: -3.056481786313732

linear total valid analysis
max error: 2.770487798191249
mean error: 0.10250670798101508
std error: 0.40072082129583864
good ratio: 0.765625
bad ratio: 0.234375

linear total test analysis
max error: 2.3954517007806513
mean error: 0.05230150277403865
std error: 0.21208548942479596
good ratio: 0.8203125
bad ratio: 0.1796875


linear couple train mse: 0.023412760147991946
linear couple train r2: 0.9796619577592581
linear couple valid mse: 27.286172955777907
linear couple valid r2: -25.20973539683677
linear couple test mse: 9.125866185368068
linear couple test r2: -7.224192992933933

linear couple valid analysis
max error: 10.583168830503688
mean error: 0.40211561782327787
std error: 1.6471414129531097
good ratio: 0.4609375
bad ratio: 0.53

### 支持向量机回归

In [9]:
from sklearn.svm import SVR

# total
# linear svr
lr_svf_t = SVR(kernel='linear')
# model_analysis(lr_svf_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'linear svr total')
# poly svr
poly_svf_t = SVR(kernel='poly')
model_analysis(poly_svf_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'poly svr total')
# rbf svr
rbf_svf_t = SVR(kernel='rbf')
model_analysis(rbf_svf_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'rbf svr total')

# couple
# linear svr
lr_svf_c = SVR(kernel='linear')
model_analysis(lr_svf_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'linear svr couple')
# poly svr
poly_svf_c = SVR(kernel='poly')
model_analysis(poly_svf_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'poly svr couple')
# rbf svr
rbf_svf_c = SVR(kernel='rbf')
model_analysis(rbf_svf_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'rbf svr couple')

poly svr total train mse: 0.6656214262387395
poly svr total train r2: 0.27114486562537266
poly svr total valid mse: 0.8225627608235122
poly svr total valid r2: -0.01649374377511892
poly svr total test mse: 0.8583827113887135
poly svr total test r2: -0.006331153290276337

poly svr total valid analysis
max error: 0.23414898789743854
mean error: 0.08606820446803076
std error: 0.05854764380226088
good ratio: 0.3515625
bad ratio: 0.6484375

poly svr total test analysis
max error: 0.24445686811787246
mean error: 0.08247558655696391
std error: 0.05749177039962627
good ratio: 0.3671875
bad ratio: 0.6328125


rbf svr total train mse: 0.6938063535806268
rbf svr total train r2: 0.2402823840475301
rbf svr total valid mse: 0.7654172304288068
rbf svr total valid r2: 0.05412466602594457
rbf svr total test mse: 0.8166585259068445
rbf svr total test r2: 0.04258449603371428

rbf svr total valid analysis
max error: 0.23445730402784348
mean error: 0.08368352547035494
std error: 0.05612317072091765
good ra

### K近邻回归

In [10]:
from sklearn.neighbors import KNeighborsRegressor

# total 
# uniform knn
uni_knn_t = KNeighborsRegressor(n_neighbors=5, weights='uniform')
model_analysis(uni_knn_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'uniform knn total')
# distance knn
dis_knn_t = KNeighborsRegressor(n_neighbors=5, weights='distance')
model_analysis(dis_knn_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'distance knn total')

# couple
# uniform knn
uni_knn_c = KNeighborsRegressor(n_neighbors=5, weights='uniform')
model_analysis(uni_knn_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'uniform knn couple')
# distance knn
dis_knn_c = KNeighborsRegressor(n_neighbors=5, weights='distance')
model_analysis(dis_knn_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'distance knn couple')

uniform knn total train mse: 0.40494201094912124
uniform knn total train r2: 0.5565886971637332
uniform knn total valid mse: 0.7099092739175914
uniform knn total valid r2: 0.1227194203847527
uniform knn total test mse: 0.8332446448753767
uniform knn total test r2: 0.023139640016973884

uniform knn total valid analysis
max error: 0.25628169281548524
mean error: 0.07675723154747965
std error: 0.06218816785899593
good ratio: 0.46875
bad ratio: 0.53125

uniform knn total test analysis
max error: 0.2493623164914555
mean error: 0.08283223849970206
std error: 0.05969844699892884
good ratio: 0.3828125
bad ratio: 0.6171875


distance knn total train mse: 0.0
distance knn total train r2: 1.0
distance knn total valid mse: 0.7321729343269028
distance knn total valid r2: 0.0952067823254491
distance knn total test mse: 0.6322754995613478
distance knn total test r2: 0.2587472647936151

distance knn total valid analysis
max error: 0.2683179694394419
mean error: 0.076091965277785
std error: 0.061681607

### 回归树

In [11]:
from sklearn.tree import DecisionTreeRegressor

# total
dtr_t = DecisionTreeRegressor()
model_analysis(dtr_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'decision tree total')

# couple
dtr_c = DecisionTreeRegressor()
model_analysis(dtr_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'decision tree couple')

decision tree total train mse: 0.0
decision tree total train r2: 1.0
decision tree total valid mse: 0.1178592263877789
decision tree total valid r2: 0.8543537685204576
decision tree total test mse: 0.10383602977337603
decision tree total test r2: 0.8782670827259873

decision tree total valid analysis
max error: 0.11094789876094113
mean error: 0.027338770367426473
std error: 0.025189040276549706
good ratio: 0.8046875
bad ratio: 0.1953125

decision tree total test analysis
max error: 0.11183152468420941
mean error: 0.026454788270785946
std error: 0.023139494335136325
good ratio: 0.84375
bad ratio: 0.15625


decision tree couple train mse: 0.0
decision tree couple train r2: 1.0
decision tree couple valid mse: 0.055331953218411556
decision tree couple valid r2: 0.9468508883530466
decision tree couple test mse: 0.20376529548784417
decision tree couple test r2: 0.8163675555487393

decision tree couple valid analysis
max error: 0.22238553917957485
mean error: 0.03701491514020056
std error: 0.

### 集成模型

#### 基础模型

In [12]:
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor


# total
# random forest
rfr_t = RandomForestRegressor()
model_analysis(rfr_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'random forest total')
# extra tree
etr_t = ExtraTreesRegressor()
model_analysis(etr_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'extra tree total')
# gradient boosting
gbr_t = GradientBoostingRegressor()
model_analysis(gbr_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'gradient boosting total')


# couple
# random forest
rfr_c = RandomForestRegressor()
model_analysis(rfr_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'random forest couple')
# extra tree
etr_c = ExtraTreesRegressor()
model_analysis(etr_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'extra tree couple')
# gradient boosting
gbr_c = GradientBoostingRegressor()
model_analysis(gbr_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'gradient boosting couple')
# XGBoost extreme gradient boosting

random forest total train mse: 0.009613662709927834
random forest total train r2: 0.9894730440606886
random forest total valid mse: 0.06314172559332651
random forest total valid r2: 0.9219717058762489
random forest total test mse: 0.04808478537717725
random forest total test r2: 0.9436274555832505

random forest total valid analysis
max error: 0.09029101806513795
mean error: 0.020721965061938512
std error: 0.018096961269829594
good ratio: 0.9140625
bad ratio: 0.0859375

random forest total test analysis
max error: 0.059740064977995166
mean error: 0.0185391285274865
std error: 0.014658881496501436
good ratio: 0.9453125
bad ratio: 0.0546875


extra tree total train mse: 0.0
extra tree total train r2: 1.0
extra tree total valid mse: 0.1241980969260451
extra tree total valid r2: 0.8465204182259499
extra tree total test mse: 0.11623344347662277
extra tree total test r2: 0.8637328854917243

extra tree total valid analysis
max error: 0.1261399174241882
mean error: 0.027674793762929202
std err

#### XGBoost

In [13]:
import xgboost as xgb


# total
# XGBoost extreme gradient boosting
xgb_t = xgb.XGBRegressor(booster='gbtree',
                         n_estimators=100,
                         learning_rate=0.1,
                         max_depth=6,
                         min_child_weight=3,
                         seed=42)
model_analysis(xgb_t, scaler_yt, xt_train, yt_train, xt_valid, yt_valid, xt_test, yt_test, 'XGBoost total')

# couple
# XGBoost extreme gradient boosting
xgb_c = xgb.XGBRegressor(booster='gbtree',
                         n_estimators=100,
                         learning_rate=0.1,
                         max_depth=6,
                         min_child_weight=3,
                         seed=42)
model_analysis(xgb_c, scaler_yc, xc_train, yc_train, xc_valid, yc_valid, xc_test, yc_test, 'XGBoost couple')

XGBoost total train mse: 0.0033816441457960402
XGBoost total train r2: 0.9962971013234672
XGBoost total valid mse: 0.04216242344273091
XGBoost total valid r2: 0.9478971797738254
XGBoost total test mse: 0.04122712030791753
XGBoost total test r2: 0.9516670886122776

XGBoost total valid analysis
max error: 0.07438467023746123
mean error: 0.01755563784389258
std error: 0.014911232170357693
good ratio: 0.9375
bad ratio: 0.0625

XGBoost total test analysis
max error: 0.07270817400778765
mean error: 0.017357537591847536
std error: 0.014570174796645722
good ratio: 0.96875
bad ratio: 0.03125


XGBoost couple train mse: 0.0009914654575179742
XGBoost couple train r2: 0.9991387403181949
XGBoost couple valid mse: 0.023308572675106998
XGBoost couple valid r2: 0.9776109488390847
XGBoost couple test mse: 0.01783057050495529
XGBoost couple test r2: 0.9839311633517062

XGBoost couple valid analysis
max error: 0.11640568765523625
mean error: 0.025589947968222983
std error: 0.021050911298348116
good ratio

### 懒模型

In [None]:
from lazypredict.Supervised import LazyRegressor
import warnings

warnings.filterwarnings("ignore")

# total
reg_t = LazyRegressor(verbose=0, predictions=True, ignore_warnings=True)
models_t, predictions_t = reg_t.fit(xt_train, xt_valid, yt_train.ravel(), yt_valid.ravel())
print('total models')
print(models_t)
print('')

# couple
reg_c = LazyRegressor(verbose=0, predictions=True, ignore_warnings=True)
models_c, predictions_c = reg_c.fit(xc_train, xc_valid, yc_train.ravel(), yc_valid.ravel())
print('couple models')
print(models_t)