In [None]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import matplotlib.pyplot as plt

# 读取数据
# 假设数据文件名为 data.csv，如需更改，请修改下一行代码



In [None]:
# 读取数据
# 假设数据文件名为 data.csv，如需更改，请修改下一行代码
data = pd.read_csv('data0.csv')
data["date"] = pd.to_datetime(data["date"]) 
# 对每条线路进行预测
date_range = pd.date_range(start=data['date'].min(), end=data['date'].max())
unique_routes = data[['place1', 'place2']].drop_duplicates()
unique_routes=unique_routes.reset_index()
predictions = []
# wanted=[['DC14','DC10'],['DC20','DC35'],['DC25','DC62']]
# wanted=[['DC19','DC8']]

for index, route in unique_routes.iterrows():
    print(f'{index}/{len(unique_routes)}')
    place1, place2 = route['place1'], route['place2']
    # if [place1,place2] not in wanted:
    #     continue
    # if index==3:
    #     break
    
    # 筛选特定线路的数据
    route_data = data[(data['place1'] == place1) & (data['place2'] == place2)]
    
    complete_dates = pd.DataFrame({'date': date_range})
    complete_dates['place1'] = place1
    complete_dates['place2'] = place2

    # 将原始数据与包含所有日期的 DataFrame 合并
    merged_data = pd.merge(complete_dates, route_data, on=['date', 'place1', 'place2'], how='left')

    # 用 0 填充 NaN 值
    merged_data['num'].fillna(0, inplace=True)


    # 准备数据用于Prophet
    df = merged_data[['date', 'num']]
    df.columns = ['ds', 'y']

    # 创建并训练Prophet模型
    model = Prophet(
        growth='linear',
        seasonality_mode='multiplicative',
        changepoint_prior_scale=5000,
        seasonality_prior_scale=50,
        daily_seasonality=False,
        weekly_seasonality=True,
        yearly_seasonality=True
    )
    # model.add_seasonality(name='weekly', period=7, fourier_order=5)
    
    # df['y'] = np.log(df['y'])
    model.fit(df)

    # 预测未来一个月的数据
    future = model.make_future_dataframe(periods=31)
    forecast = model.predict(future)
    
    # df['y'] = np.exp(df['y'])
    # forecast['yhat'] = np.exp(forecast['yhat'])
    
    forecast['yhat']=forecast['yhat'].apply(lambda x: max(0, x))
    history_max = route_data['num'].max()
    forecast['yhat']=forecast['yhat'].apply(lambda x: min(history_max, x))
    forecast['yhat']=forecast['yhat'].apply(lambda x: np.ceil(x))
    # 保存预测结果
    
    forecast_after_2022 = forecast[forecast['ds'] >= '2022-01-01']
    
    route_forecast = forecast_after_2022[['ds', 'yhat']]
    route_forecast['place1'] = place1
    route_forecast['place2'] = place2
    # print(route_forecast)
    predictions.append(route_forecast)
    
    # 绘制历史数据和预测数据的可视化
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(df['ds'], df['y'], label='Historical Data')
    ax.plot(forecast_after_2022['ds'], forecast_after_2022['yhat'], color='orange', label='Predicted Data')
    # ax.fill_between(forecast_after_2022['ds'], forecast_after_2022['yhat_lower'], forecast_after_2022['yhat_upper'], color='orange', alpha=0.2)
    ax.set_title(f'Route {index + 1}: {place1} to {place2}')
    ax.set_xlabel('Date')
    ax.set_ylabel('Num')
    ax.legend()
    # plt.savefig(f'route_{index + 1}_forecast.png')
    plt.show()

# 合并预测结果并导出为CSV文件
predictions_df = pd.concat(predictions)
predictions_df.to_csv('predictions.csv', index=False)

print("Predictions saved to predictions.csv")
