# 使用Facebook的prophet进行时间序列分析


In [6]:
import pandas as pd
import sys 
sys.path.append('/home/aistudio/external-libraries')

# Step 1: Data Integration
# Load all the data from the three Excel files
data_1 = pd.read_excel('/home/aistudio/data/data237885/附件1.xlsx')
data_3 = pd.read_excel('/home/aistudio/work/附件3(处理后).xlsx')
data_weight = pd.read_excel('/home/aistudio/work/权重判断.xlsx')

# Merge data_1 and data_3 on "单品编码"
merged_data_1_3 = pd.merge(data_3, data_1, on="单品编码", how="left")

# Merge the above merged data with data_weight on "单品名称" and "项"
final_merged_data = pd.merge(merged_data_1_3, data_weight, left_on="单品名称", right_on="项", how="left")

# Display a preview of the final merged data
final_merged_data.head()

Unnamed: 0,日期,单品编码,批发价格(元/千克),星期,分类名称_x,单品名称,分类编码,分类名称_y,项,指标变异性,指标冲突性,信息量,权重(%)
0,2020-07-01,102900005115762,3.88,3,花叶类,苋菜,1011010101,花叶类,苋菜,5.657,247.247,1398.61,0.782
1,2020-07-01,102900005115779,6.72,3,花叶类,云南生菜,1011010101,花叶类,云南生菜,14.079,243.429,3427.142,1.917
2,2020-07-01,102900005115786,3.19,3,花叶类,竹叶菜,1011010101,花叶类,竹叶菜,8.399,247.81,2081.401,1.164
3,2020-07-01,102900005115793,9.24,3,花叶类,小白菜,1011010101,花叶类,小白菜,2.422,246.549,597.223,0.334
4,2020-07-01,102900005115823,7.03,3,花叶类,上海青,1011010101,花叶类,上海青,6.979,242.225,1690.427,0.945


In [7]:
# Step 2: Calculate the weighted category wholesale price for each day and each category

# Group the data by date and category
grouped_data = final_merged_data.groupby(['日期', '分类名称_x'])

# Initialize an empty DataFrame to store the results
result_data = pd.DataFrame(columns=['日期', '分类名称', '加权品类批发价格'])

# Loop through each group to calculate the weighted category wholesale price
for (date, category), group in grouped_data:
    # Normalize the weights so that they sum to 1 within each group
    normalized_weights = group['权重(%)'] / group['权重(%)'].sum()
    
    # Calculate the weighted category wholesale price
    weighted_price = (group['批发价格(元/千克)'] * normalized_weights).sum()
    
    # Concat the results to the result_data DataFrame
    result_data = pd.concat([result_data, pd.DataFrame([{'日期': date, '分类名称': category, '加权品类批发价格': weighted_price}])])

# Display a preview of the result_data DataFrame
result_data.head()

Unnamed: 0,日期,分类名称,加权品类批发价格
0,2020-07-01,水生根茎类,17.621253
0,2020-07-01,花叶类,4.354734
0,2020-07-01,花菜类,7.809246
0,2020-07-01,茄类,4.996153
0,2020-07-01,辣椒类,5.518017


In [8]:
# Step 3: Output the result to an Excel file
output_file_path = '/home/aistudio/work/加权品类批发价格统计.xlsx'
result_data.to_excel(output_file_path, index=False)

output_file_path

'/home/aistudio/work/加权品类批发价格统计.xlsx'

In [12]:
import matplotlib.pyplot as plt
import prophet from Prophet

holidays = pd.DataFrame({
  'holiday': ['chinese_new_year', 'chinese_new_year', 'chinese_new_year', 'chinese_new_year', 
              'national_day', 'national_day', 'national_day', 'national_day',
              'mid_autumn_festival', 'mid_autumn_festival', 'mid_autumn_festival'],
  'ds': pd.to_datetime(['2020-01-25', '2021-02-12', '2022-02-01', '2023-01-22',
                        '2020-10-01', '2021-10-01', '2022-10-01', '2023-10-01',
                        '2020-10-01', '2021-09-21', '2022-09-10']),
  'lower_window': [-4, -4, -4, -4, -1, -1, -1, -1, -1, -1, -1],
  'upper_window': [0, 0, 0, 0, 6, 6, 6, 6, 0, 0, 0],
})

# 获取唯一品类的数量
unique_categories = result_data['分类名称'].unique()
n_categories = len(unique_categories)

# 创建多个子图
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 10))

# 展平axes数组以进行迭代
axes = axes.flatten()

for idx, category in enumerate(unique_categories):
    ax = axes[idx]
    
    # 过滤出特定品类的数据
    df_category = result_data[result_data['分类名称'] == category]
    
    # 重新命名列以符合Prophet的要求
    df_category = df_category.rename(columns={'日期': 'ds', '加权品类批发价格': 'y'})
    
    # 初始化Prophet模型，并加入节假日因素
    model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False, holidays=holidays)
    
    # 拟合模型
    model.fit(df_category)
    
    # 创建未来日期的数据框
    future = model.make_future_dataframe(periods=7)  # 预测未来7天
    
    # 进行预测
    forecast = model.predict(future)
    
    print(f"Forecast for category: {category}")
    print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7))

    # 在子图上绘制预测结果
    fig = model.plot(forecast, ax=ax)
    ax.set_title(f'Forecast for category: {category}')

# 保存整个图像
plt.tight_layout()
plt.savefig('all_categories_forecast_with_holidays.png')

# 显示图像（如果需要）
plt.show()

SyntaxError: invalid syntax (2357433196.py, line 2)

In [None]:
from scipy.optimize import minimize
import random
import numpy as np

np.random.seed(42)
random.seed(42)

In [None]:
file_paths = {
    'sales_forecast': '/home/aistudio/work/销量预测.xlsx',
    'wholesale_price': '/home/aistudio/work/加权批发价格预测.xlsx',
    'cost_pricing': '/home/aistudio/work/利润率预测.xlsx'
}

# Read the full data from the Excel files
sales_forecast_df = pd.read_excel(file_paths['sales_forecast'])
wholesale_price_df = pd.read_excel(file_paths['wholesale_price'])
cost_pricing_df = pd.read_excel(file_paths['cost_pricing'])

# Convert the '日期' column to datetime format for easier filtering
sales_forecast_df['日期'] = pd.to_datetime(sales_forecast_df['日期'])
wholesale_price_df['日期'] = pd.to_datetime(wholesale_price_df['日期'])
cost_pricing_df['日期'] = pd.to_datetime(cost_pricing_df['日期'])

# Filter the data for the period 2023-07-01 to 2023-07-07
target_dates = pd.date_range(start='2023-07-01', end='2023-07-07')
sales_forecast_filtered = sales_forecast_df[sales_forecast_df['日期'].isin(target_dates)]
wholesale_price_filtered = wholesale_price_df[wholesale_price_df['日期'].isin(target_dates)]
cost_pricing_filtered = cost_pricing_df[cost_pricing_df['日期'].isin(target_dates)]

# Show first few rows of the filtered data
sales_forecast_filtered.head(), wholesale_price_filtered.head(), cost_pricing_filtered.head()

In [None]:
sales_forecast_df = pd.read_excel(file_paths['sales_forecast'])
wholesale_price_df = pd.read_excel(file_paths['wholesale_price'])
cost_pricing_df = pd.read_excel(file_paths['cost_pricing'])

# Convert the '日期' column to datetime format for easier filtering
sales_forecast_df['日期'] = pd.to_datetime(sales_forecast_df['日期'])
wholesale_price_df['日期'] = pd.to_datetime(wholesale_price_df['日期'])
cost_pricing_df['日期'] = pd.to_datetime(cost_pricing_df['日期'])

# Filter the data for the period 2023-07-01 to 2023-07-07
target_dates = pd.date_range(start='2023-07-01', end='2023-07-07')
sales_forecast_filtered = sales_forecast_df[sales_forecast_df['日期'].isin(target_dates)]
wholesale_price_filtered = wholesale_price_df[wholesale_price_df['日期'].isin(target_dates)]
cost_pricing_filtered = cost_pricing_df[cost_pricing_df['日期'].isin(target_dates)]

# Define the new bounds for P for each category
P_bounds = {
    '水生根茎类': (0.3351, 0.8184),
    '花叶类': (0.3879, 0.6366),
    '花菜类': (0.5046, 0.6533),
    '茄类': (0.9084, 1.9028),
    '辣椒类': (0.7028, 0.7914),
    '食用菌': (0.4123, 0.4578)
}
P_bounds_list = [P_bounds[col] for col in sales_forecast_filtered.columns[1:]]
Q_in_bounds_list = [(None, None)] * 6  # Assuming there are 6 categories
total_bounds = P_bounds_list + Q_in_bounds_list

# Known decay rates and penalty coefficients
decay_rates = {
    '花菜类': 0.1551,
    '水生根茎类': 0.1365,
    '花叶类': 0.1283,
    '食用菌': 0.0945,
    '辣椒类': 0.0924,
    '茄类': 0.0668
}
alpha = 0.1
beta = 0.1

# Objective function
def objective_function(params, *args):
    n_categories = len(args[0].columns) - 1
    P_values = params[:n_categories]
    Q_in_values = params[n_categories:]
    
    sales_forecast, wholesale_price, cost_pricing = args
    W = np.mean(wholesale_price.iloc[:, 1:], axis=0).values
    lambda_values = np.array([decay_rates[col] for col in wholesale_price.columns[1:]])
    sales_forecast_values = np.mean(sales_forecast.iloc[:, 1:], axis=0).values
    
    C = W * (1 + P_values)
    Q_out_values = (1 - lambda_values) * Q_in_values
    R = np.sum(C * Q_out_values)
    L = np.sum(lambda_values * W * Q_in_values)
    S = np.sum(W * Q_in_values)
    
    C_target = W * (1 + np.mean(cost_pricing.iloc[:, 1:], axis=0).values)
    Q_in_target = sales_forecast_values
    penalty = alpha * np.sum((C - C_target) ** 2) + beta * np.sum((Q_in_values - Q_in_target) ** 2)
    
    Profit = R - L - S - penalty
    
    return -Profit  # Negative because we're using minimize

# Initial guess
initial_guess_P = np.full(6, 0.6)
initial_guess_Q_in = np.mean(sales_forecast_filtered.iloc[:, 1:], axis=0).values
initial_guess = np.concatenate([initial_guess_P, initial_guess_Q_in])

# Perform optimization
result = minimize(objective_function, initial_guess, args=(sales_forecast_filtered, wholesale_price_filtered, cost_pricing_filtered),
                  bounds=total_bounds)

# Extract optimized parameters
optimized_P_values = result.x[:6]
optimized_Q_in_values = result.x[6:]

print("Optimized P values:", optimized_P_values)
print("Optimized Q_in values:", optimized_Q_in_values)
print("Maximized Profit:", -result.fun)