In [13]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 假设df_csv是已经加载好的DataFrame
df_csv = pd.read_csv('df_past_order_with_cluster.csv', index_col=0, parse_dates=True)
df_csv.drop('SKU', axis=1, inplace=True)
grouped = df_csv.groupby('Cluster')

# 分别获取每个Cluster的数据
group_0 = grouped.get_group(0)
group_1 = grouped.get_group(1)
group_2 = grouped.get_group(2)
group_0 = group_0.T

# 假设X是我们要预测的特征，train和test是时间序列数据
X = df_csv.iloc[:, 0]
train = group_0.loc['2016-06-01':'2016-06-18']   
test = group_0.loc['2016-06-18':'2016-06-23']
X_train = group_0.loc['2016-06-01':'2016-06-18']   
X_test = group_0.loc['2016-06-18':'2016-06-23']

# 初始化XGBoost模型
xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=80)

# 初始化用于存储RMSE总和的变量
sum_rmse = 0

# 对每一列进行预测
for col in train:
    train_data = train[col]
    test_data = test[col]
    
    # 训练XGBoost模型
    xgb_model.fit(X_train, train_data, eval_set=[(X_test, test_data)])
    
    # 使用模型进行预测
    y_pred = xgb_model.predict(X_test)
    
    # 计算MSE和RMSE
    rmse = np.sqrt(mean_squared_error(test_data, y_pred))
    
    # 累加RMSE
    sum_rmse += rmse
    
    print(f'RMSE for {col}: {rmse}')

# 打印RMSE总和
print(f'Total RMSE Sum: {sum_rmse}')

  df_csv = pd.read_csv('df_past_order_with_cluster.csv', index_col=0, parse_dates=True)


[0]	validation_0-rmse:38.79791
[1]	validation_0-rmse:30.59168
[2]	validation_0-rmse:24.89944
[3]	validation_0-rmse:21.62588
[4]	validation_0-rmse:20.11431
[5]	validation_0-rmse:18.80418
[6]	validation_0-rmse:17.70185
[7]	validation_0-rmse:17.48825
[8]	validation_0-rmse:17.22737
[9]	validation_0-rmse:17.03472
[10]	validation_0-rmse:16.89369
[11]	validation_0-rmse:16.77961
[12]	validation_0-rmse:16.65136
[13]	validation_0-rmse:16.57440
[14]	validation_0-rmse:16.54411
[15]	validation_0-rmse:16.47896
[16]	validation_0-rmse:16.46719
[17]	validation_0-rmse:16.46000
[18]	validation_0-rmse:16.45338
[19]	validation_0-rmse:16.45017
[20]	validation_0-rmse:16.44823
[21]	validation_0-rmse:16.44647
[22]	validation_0-rmse:16.44458
[23]	validation_0-rmse:16.44295
[24]	validation_0-rmse:16.44180
[25]	validation_0-rmse:16.44189
[26]	validation_0-rmse:16.44124
[27]	validation_0-rmse:16.44252
[28]	validation_0-rmse:16.44172
[29]	validation_0-rmse:16.44230
[30]	validation_0-rmse:16.44232
[31]	validation_0-

In [7]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

# 定义 train_xgb_model 函数
def train_xgb_model(X, y, param_grid, n_estimators):
    # 网格搜索交叉验证找到最优超参数
    xgb_model = xgb.XGBRegressor(n_estimators=n_estimators, eval_metric='rmse', use_label_encoder=False)
    cv_result = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, scoring='neg_root_mean_squared_error')
    cv_result.fit(X, y)
    
    # 打印最佳参数
    print("Best parameters:", cv_result.best_params_)
    
    # 使用最佳参数初始化模型
    xgb_model = xgb.XGBRegressor(**cv_result.best_params_, n_estimators=n_estimators, random_state=80, eval_metric='rmse', use_label_encoder=False)
    
    # 训练模型
    xgb_model.fit(X, y)

    return xgb_model, cv_result.best_params_

# 假设其他代码和数据准备已经完成，并且 X, train, test 已经正确定义
df_csv = pd.read_csv('df_past_order_with_cluster.csv', index_col=0, parse_dates=True)
df_csv.drop('SKU', axis=1, inplace=True)
grouped = df_csv.groupby('Cluster')

# 分别获取每个Cluster的数据
group_0 = grouped.get_group(0)
group_1 = grouped.get_group(1)
group_2 = grouped.get_group(2)
group_0 = group_0.T

# 假设X是标签，train和test是时间序列数据
X = df_csv.iloc[:, 0]
train = group_0.loc['2016-06-01':'2016-06-18']   
test = group_0.loc['2016-06-18':'2016-06-23']
X_train = group_0.loc['2016-06-01':'2016-06-18']   
X_test = group_0.loc['2016-06-18':'2016-06-23']
# 定义参数网格
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# 初始化用于存储RMSE总和的变量
sum_rmse = 0

# 对每一列进行预测
for col in train.columns:
    train_data = train[col]
    test_data = test[col]
    
    # 训练XGBoost模型
    model, best_params = train_xgb_model(X_train, train_data, param_grid, 100)
    
    # 使用模型进行预测
    y_pred = model.predict(X_test)
    
    # 计算MSE和RMSE
    rmse = np.sqrt(mean_squared_error(test_data, y_pred))
    
    # 累加RMSE
    sum_rmse += rmse
    print(f'RMSE for {col}: {rmse}')

# 打印RMSE总和
print(f'Total RMSE Sum: {sum_rmse}')


  df_csv = pd.read_csv('df_past_order_with_cluster.csv', index_col=0, parse_dates=True)
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not u

Best parameters: {'colsample_bytree': 0.9, 'learning_rate': 0.2, 'max_depth': 7, 'subsample': 0.8}
RMSE for shang-hai-shi: 12.762443514076889


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Best parameters: {'colsample_bytree': 0.9, 'learning_rate': 0.1, 'max_depth': 5, 'subsample': 0.7}
RMSE for bei-jing-shi: 14.686001190750568


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Best parameters: {'colsample_bytree': 0.9, 'learning_rate': 0.1, 'max_depth': 5, 'subsample': 0.7}
RMSE for tian-jin-shi: 9.448233251231464


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Best parameters: {'colsample_bytree': 0.7, 'learning_rate': 0.1, 'max_depth': 5, 'subsample': 0.7}
RMSE for guang-zhou-shi: 9.560634238958178


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Best parameters: {'colsample_bytree': 0.7, 'learning_rate': 0.1, 'max_depth': 5, 'subsample': 0.8}
RMSE for shen-zhen-shi: 3.648842335121858


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Best parameters: {'colsample_bytree': 0.9, 'learning_rate': 0.1, 'max_depth': 3, 'subsample': 0.9}


Parameters: { "use_label_encoder" } are not used.



RMSE for chong-qing-shi: 8.890165235848324
Total RMSE Sum: 58.99631976598728
