# Prophet을 사용하여 시계열 데이터로 수요 예측하기

In [1]:
!pip install prophet



In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("/content/ORDER_PROD_CUS_REV.csv", encoding='utf-8')
df.head()

Unnamed: 0,order_id,order_item_id,order_purchase_timestamp,seller_id,p_product_id,product_category_name,price,freight_value,customer_id,customer_state,review_id,review_score
0,ORDER_75477,1,2018-09-13 08:59:02,SELLER_0452,PRODUCT_23128,cool_stuff,58.9,13.29,CUSTOMER_58023,RJ,REVIEW_45875,5
1,ORDER_63604,1,2018-04-26 10:53:06,SELLER_0414,PRODUCT_24351,pet_shop,239.9,19.93,CUSTOMER_30341,SP,REVIEW_24615,4
2,ORDER_05566,1,2019-01-14 14:33:31,SELLER_1637,PRODUCT_20226,furniture_decor,199.0,17.87,CUSTOMER_30949,MG,REVIEW_03775,5
3,ORDER_04632,1,2018-02-04 13:57:51,SELLER_1445,PRODUCT_07878,garden_tools,199.9,18.14,CUSTOMER_06713,SP,REVIEW_49155,5
4,ORDER_61452,1,2018-05-15 21:42:34,SELLER_0581,PRODUCT_03505,home_utilities,21.9,12.69,CUSTOMER_20396,MG,REVIEW_20134,4


In [4]:
df = df.drop(columns = ['order_item_id', 'p_product_id', 'customer_id', 'review_id'])
df.head()

Unnamed: 0,order_id,order_purchase_timestamp,seller_id,product_category_name,price,freight_value,customer_state,review_score
0,ORDER_75477,2018-09-13 08:59:02,SELLER_0452,cool_stuff,58.9,13.29,RJ,5
1,ORDER_63604,2018-04-26 10:53:06,SELLER_0414,pet_shop,239.9,19.93,SP,4
2,ORDER_05566,2019-01-14 14:33:31,SELLER_1637,furniture_decor,199.0,17.87,MG,5
3,ORDER_04632,2018-02-04 13:57:51,SELLER_1445,garden_tools,199.9,18.14,SP,5
4,ORDER_61452,2018-05-15 21:42:34,SELLER_0581,home_utilities,21.9,12.69,MG,4


In [5]:
# 범주 병합 - 여기서는 적용하지 않고 모든 범주 그대로 사용

category_mapping = {
    'agro_industry_and_commerce': 'agro_industry_and_commerce',
    'air_conditioning': 'air_conditioning',
    'arts': 'arts_and_crafts',
    'arts_and_crafts': 'arts_and_crafts',
    'audio': 'audio',
    'automotive': 'automotive',
    'baby': 'baby',
    'bed_bath_table': 'bed_bath_table',
    'bedroom_furniture': 'furniture',
    'blu_ray_dvds': 'blu_ray_dvds',
    'books_general_interest': 'books',
    'imported_books': 'books',
    'technical_books': 'books',
    'christmas_articles': 'christmas_articles',
    'cinema_photo': 'cinema_photo',
    'computers_accessories': 'computers_accessories',
    'consoles_games': 'consoles_games',
    'construction_tools': 'construction_tools',
    'construction_tools_construction': 'construction_tools',
    'construction_tools_garden': 'construction_tools',
    'construction_tools_lighting': 'construction_tools',
    'construction_tools_safety': 'construction_tools',
    'cool_stuff': 'cool_stuff',
    'diapers_hygiene': 'diapers_hygiene',
    'drinks': 'food_drinks',
    'food_drink': 'food_drinks',
    'electronics': 'electronics',
    'fashion_bags_accessories': 'fashion',
    'fashion_childrens_clothes': 'fashion',
    'fashion_mens_clothing': 'fashion',
    'fashion_shoes': 'fashion',
    'fashion_sport': 'fashion',
    'fashion_underwear_beachwear': 'fashion',
    'fashion_womens_clothing': 'fashion',
    'flowers': 'flowers',
    'food': 'food',
    'furniture_decor': 'furniture',
    'furniture_mattress_and_upholstery': 'furniture',
    'living_room_furniture': 'furniture',
    'office_furniture': 'furniture',
    'kitchen_laundry_room_dining_garden_furniture': 'furniture',
    'garden_tools': 'garden_tools',
    'health_beauty': 'health_beauty',
    'home_appliances': 'home_appliances',
    'home_appliances_2': 'home_appliances',
    'home_comfort': 'home_comfort',
    'home_comfort_2': 'home_comfort',
    'home_construction': 'home_construction',
    'home_utilities': 'home_utilities',
    'insurance_and_services': 'insurance_and_services',
    'landline_phones': 'landline_phones',
    'luggage_accessories': 'luggage_accessories',
    'marketplace': 'marketplace',
    'music': 'music',
    'musical_cds_dvds': 'music',
    'musical_instruments': 'music',
    'party_supplies': 'party_supplies',
    'pcs': 'pcs',
    'perfumery': 'perfumery',
    'pet_shop': 'pet_shop',
    'portable_home_oven_and_coffee': 'portable_home_oven_and_coffee',
    'signaling_and_security': 'signaling_and_security',
    'small_appliances': 'small_appliances',
    'sports_leisure': 'sports_leisure',
    'stationery': 'stationery',
    'tablets_printing_image': 'tablets_printing_image',
    'telephony': 'telephony',
    'the_kitchen': 'the_kitchen',
    'toys': 'toys',
    'watches_gifts': 'watches_gifts',
    'Unknown' : 'Unknown'
}

In [6]:
df['order_purchase_timestamp'] = pd.to_datetime(df['order_purchase_timestamp'])
df['order_year'] = df['order_purchase_timestamp'].dt.year
df['order_month'] = df['order_purchase_timestamp'].dt.month
df['order_day'] = df['order_purchase_timestamp'].dt.day
df['order_hour'] = df['order_purchase_timestamp'].dt.hour


In [7]:
# 계절 정보를 컬럼으로 추가
# 봄 : 9, 10, 11, 여름 : 12, 1, 2, 가을 : 3, 4, 5, 겨울 : 6, 7, 8
def get_season(month):
    if month in [12, 1, 2]:
        return 'Summer'
    elif month in [3, 4, 5]:
        return 'Autumn'
    elif month in [6, 7, 8]:
        return 'Winter'
    else:
        return 'Spring'

df['season'] = df['order_month'].apply(get_season)
df.head()

Unnamed: 0,order_id,order_purchase_timestamp,seller_id,product_category_name,price,freight_value,customer_state,review_score,order_year,order_month,order_day,order_hour,season
0,ORDER_75477,2018-09-13 08:59:02,SELLER_0452,cool_stuff,58.9,13.29,RJ,5,2018,9,13,8,Spring
1,ORDER_63604,2018-04-26 10:53:06,SELLER_0414,pet_shop,239.9,19.93,SP,4,2018,4,26,10,Autumn
2,ORDER_05566,2019-01-14 14:33:31,SELLER_1637,furniture_decor,199.0,17.87,MG,5,2019,1,14,14,Summer
3,ORDER_04632,2018-02-04 13:57:51,SELLER_1445,garden_tools,199.9,18.14,SP,5,2018,2,4,13,Summer
4,ORDER_61452,2018-05-15 21:42:34,SELLER_0581,home_utilities,21.9,12.69,MG,4,2018,5,15,21,Autumn


In [8]:
df['order_date'] = df['order_purchase_timestamp'].dt.date
df['order_day_of_week'] = df['order_purchase_timestamp'].dt.dayofweek
df.head()

Unnamed: 0,order_id,order_purchase_timestamp,seller_id,product_category_name,price,freight_value,customer_state,review_score,order_year,order_month,order_day,order_hour,season,order_date,order_day_of_week
0,ORDER_75477,2018-09-13 08:59:02,SELLER_0452,cool_stuff,58.9,13.29,RJ,5,2018,9,13,8,Spring,2018-09-13,3
1,ORDER_63604,2018-04-26 10:53:06,SELLER_0414,pet_shop,239.9,19.93,SP,4,2018,4,26,10,Autumn,2018-04-26,3
2,ORDER_05566,2019-01-14 14:33:31,SELLER_1637,furniture_decor,199.0,17.87,MG,5,2019,1,14,14,Summer,2019-01-14,0
3,ORDER_04632,2018-02-04 13:57:51,SELLER_1445,garden_tools,199.9,18.14,SP,5,2018,2,4,13,Summer,2018-02-04,6
4,ORDER_61452,2018-05-15 21:42:34,SELLER_0581,home_utilities,21.9,12.69,MG,4,2018,5,15,21,Autumn,2018-05-15,1


In [9]:
# 0: 주중, 1: 주말
df['is_weekend'] = df['order_purchase_timestamp'].dt.dayofweek.apply(lambda x: 1 if x >= 5 else 0)
df.head()

Unnamed: 0,order_id,order_purchase_timestamp,seller_id,product_category_name,price,freight_value,customer_state,review_score,order_year,order_month,order_day,order_hour,season,order_date,order_day_of_week,is_weekend
0,ORDER_75477,2018-09-13 08:59:02,SELLER_0452,cool_stuff,58.9,13.29,RJ,5,2018,9,13,8,Spring,2018-09-13,3,0
1,ORDER_63604,2018-04-26 10:53:06,SELLER_0414,pet_shop,239.9,19.93,SP,4,2018,4,26,10,Autumn,2018-04-26,3,0
2,ORDER_05566,2019-01-14 14:33:31,SELLER_1637,furniture_decor,199.0,17.87,MG,5,2019,1,14,14,Summer,2019-01-14,0,0
3,ORDER_04632,2018-02-04 13:57:51,SELLER_1445,garden_tools,199.9,18.14,SP,5,2018,2,4,13,Summer,2018-02-04,6,1
4,ORDER_61452,2018-05-15 21:42:34,SELLER_0581,home_utilities,21.9,12.69,MG,4,2018,5,15,21,Autumn,2018-05-15,1,0


# 데이터 추출

In [10]:
practice_selected = ['order_date', 'product_category_name', 'season', 'is_weekend']
practice_df = df[practice_selected]
practice_df.head()

Unnamed: 0,order_date,product_category_name,season,is_weekend
0,2018-09-13,cool_stuff,Spring,0
1,2018-04-26,pet_shop,Autumn,0
2,2019-01-14,furniture_decor,Summer,0
3,2018-02-04,garden_tools,Summer,1
4,2018-05-15,home_utilities,Autumn,0


In [11]:
# 'order_date' 컬럼을 datetime 형식으로 변환
practice_df['order_date'] = pd.to_datetime(practice_df['order_date'])

# 날짜별, 카테고리별로 count를 계산하여 'demand' 컬럼 추가
demand_df = practice_df.groupby(['order_date', 'product_category_name', 'season', 'is_weekend']).size().reset_index(name='demand')

demand_df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  practice_df['order_date'] = pd.to_datetime(practice_df['order_date'])


Unnamed: 0,order_date,product_category_name,season,is_weekend,demand
0,2017-10-03,fashion_shoes,Spring,0,1
1,2017-10-03,furniture_decor,Spring,0,2
2,2017-10-03,sports_leisure,Spring,0,3
3,2017-10-03,toys,Spring,0,1
4,2017-10-03,watches_gifts,Spring,0,1
5,2017-10-04,Unknown,Spring,0,1
6,2017-10-04,air_conditioning,Spring,0,4
7,2017-10-04,automotive,Spring,0,1
8,2017-10-04,baby,Spring,0,3
9,2017-10-04,bed_bath_table,Spring,0,2


# 7월 테스트셋 검증

In [13]:
import pandas as pd
from prophet import Prophet
from tqdm import tqdm

test_start_date = '2019-07-01'
test_end_date = '2019-07-31'

test_data = demand_df[(demand_df['order_date'] >= test_start_date) & (demand_df['order_date'] <= test_end_date)]

# 2019년 7월 데이터 제거
train_end_date = '2019-06-30'
demand_df_filtered = demand_df[demand_df['order_date'] <= train_end_date]

# 계절을 숫자로 매핑
def map_season(season):
    if season == 'Spring':
        return 1
    elif season == 'Summer':
        return 2
    elif season == 'Autumn':
        return 3
    elif season == 'Winter':
        return 4
    else:
        return 0  # 예외 처리

demand_df_filtered['season'] = demand_df_filtered['season'].apply(map_season)

def is_weekend(date):
    day_of_week = date.weekday()  # 0: 월요일, 6: 일요일
    return 1 if day_of_week >= 5 else 0  # 토요일(5), 일요일(6)은 주말로 처리


category_groups = demand_df_filtered.groupby('product_category_name')

results = pd.DataFrame()

# 모델 훈련 및 예측 수행
for category, group in tqdm(category_groups, desc="Processing categories"):
    if category in test_data['product_category_name'].unique():  # 테스트 데이터셋에 해당 카테고리가 있는 경우에만 예측 수행

        group = group.rename(columns={'order_date': 'ds', 'demand': 'y', 'season': 'season', 'is_weekend': 'is_weekend'})
        group['is_weekend'] = group['ds'].apply(is_weekend)

        # 하이퍼파라미터 # 값이 크면 모델이 더 유연해지지만, 훈련 시간이 증가
        model = Prophet(growth ='linear',
                        n_changepoints = 25,
                        changepoint_range=0.8,
                        changepoint_prior_scale=0.05)

        model.add_regressor('season')
        model.add_regressor('is_weekend')

        model.fit(group[['ds', 'y', 'season', 'is_weekend']])

        future_dates = pd.date_range(start='2019-07-01', end='2019-07-31', freq='D')
        future = pd.DataFrame({'ds': future_dates})

        future['season'] = 4
        future['is_weekend'] = future['ds'].apply(is_weekend)

        forecast = model.predict(future)

        forecast['product_category_name'] = category
        forecast['order_date'] = forecast['ds']
        forecast['demand'] = forecast['yhat']
        results = pd.concat([results, forecast[['order_date', 'product_category_name','demand']]])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  demand_df_filtered['season'] = demand_df_filtered['season'].apply(map_season)
Processing categories:   0%|          | 0/72 [00:00<?, ?it/s]INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpizsxicff/mb6zxfsb.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpizsxicff/ir1gplg6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=28807', 'data', 'file=/tmp/tmpizsxicff/mb6zxfsb.json', 'init=/tmp/tm

In [14]:
print(len(results))
results.tail()

2015


Unnamed: 0,order_date,product_category_name,demand
26,2019-07-27,watches_gifts,18.574814
27,2019-07-28,watches_gifts,15.982368
28,2019-07-29,watches_gifts,16.857763
29,2019-07-30,watches_gifts,19.53888
30,2019-07-31,watches_gifts,18.814422


In [15]:
# 모든 날짜와 카테고리의 조합 생성
all_dates = pd.date_range(start=test_start_date, end=test_end_date, freq='D')
all_categories = test_data['product_category_name'].unique()
all_combinations = pd.MultiIndex.from_product([all_dates, all_categories], names=['order_date', 'product_category_name']).to_frame(index=False)

# 예측 결과와 실제 데이터를 모든 조합에 맞게 병합
results = pd.merge(all_combinations, results, on=['order_date', 'product_category_name'], how='left')
test_data = pd.merge(all_combinations, test_data, on=['order_date', 'product_category_name'], how='left')


merged_results = pd.merge(results, test_data, on=['order_date', 'product_category_name'], how='inner')

In [16]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score, mean_squared_error

# NaN을 0으로 대체한 후 계산
mae = mean_absolute_error(merged_results['demand_x'].fillna(0), merged_results['demand_y'].fillna(0))
r2 = r2_score(merged_results['demand_x'].fillna(0), merged_results['demand_y'].fillna(0))
rmse = mean_squared_error(merged_results['demand_x'].fillna(0), merged_results['demand_y'].fillna(0), squared=False)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2) Score: {r2}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 2.496000502224008
R-squared (R2) Score: 0.48385077702458634
Root Mean Squared Error (RMSE): 4.171865956641915


In [17]:
# 판매량의 스케일과 비교하여 위의 평가지표가 어떤지 검토
sales_data = demand_df['demand']

# 기술 통계량 계산
stats = sales_data.describe()
print(stats)

count    17461.000000
mean         5.750816
std          7.079967
min          1.000000
25%          1.000000
50%          3.000000
75%          7.000000
max        194.000000
Name: demand, dtype: float64


##MAE (2.496):

수요의 평균값(5.75)과 비교할 때 MAE가 약 2.496으로, 이는 비교적 적당한 오차로 보입니다. 특히 수요의 중앙값(3)과 비교했을 때 MAE는 상당히 낮습니다.
수요 데이터의 25%가 1 이하, 75%가 7 이하인 점을 고려할 때, MAE가 2.496이라면 이는 수요가 낮은 구간에서는 매우 양호한 성능을 보이는 것으로 해석될 수 있습니다.

##R2 Score (0.4838):

R2 값이 0.4838이라면 이는 모델이 약 48.38%의 분산을 설명하고 있다는 의미입니다. 이는 데이터의 절반 가까이를 설명할 수 있는 수준으로, 매우 높은 설명력은 아니지만, 초기 모델로서는 적당한 수준입니다.
특히 판매량 예측에서 다양한 카테고리와 변동성을 고려할 때, 이 정도의 R2는 비교적 양호한 결과로 볼 수 있습니다.

##RMSE (4.1719):

RMSE가 4.1719라는 것은 평균적으로 예측이 약 4.1719 정도의 오차를 가지고 있다는 것을 의미합니다. 이는 표준 편차(7.08)보다 낮아, 예측 오차가 데이터의 분산보다 작음을 나타냅니다.

# 8월 데이터셋에 모델 적용 & 예측

In [18]:
# 2019년 8월 데이터셋 만들기

def get_season(month):
    if month in [12, 1, 2]:
        return 2  
    elif month in [3, 4, 5]:
        return 3  
    elif month in [6, 7, 8]:
        return 4  
    else:
        return 1  


demand_df['season'] = demand_df['order_date'].dt.month.apply(get_season)


dates = pd.date_range(start='2019-08-01', end='2019-08-31', freq='D')


date_list = []

for date in dates:
    date_list.append({'date': date})

X_august = pd.DataFrame(date_list)

X_august['product_category_name'] = ''

X_august['is_weekend'] = X_august['date'].apply(lambda x: 1 if x.weekday() >= 5 else 0)

X_august['season'] = X_august['date'].dt.month.apply(get_season)

X_august.head()

Unnamed: 0,date,product_category_name,is_weekend,season
0,2019-08-01,,0,4
1,2019-08-02,,0,4
2,2019-08-03,,1,4
3,2019-08-04,,1,4
4,2019-08-05,,0,4


In [19]:
results = pd.DataFrame()


for category in demand_df['product_category_name'].unique():
    group = demand_df[demand_df['product_category_name'] == category]

    if not group.empty:  # 카테고리가 존재하는 경우에만 진행
        
        group = group.rename(columns={'order_date': 'ds', 'demand': 'y', 'season': 'season', 'is_weekend': 'is_weekend'})

        
        model = Prophet()
        
        model.add_regressor('season')
        model.add_regressor('is_weekend')

        model.fit(group[['ds', 'y', 'season', 'is_weekend']])

        future = X_august.rename(columns={'date': 'ds'})

        future['season'] = future['ds'].dt.month.apply(get_season)
        future['is_weekend'] = future['ds'].apply(lambda x: 1 if x.weekday() >= 5 else 0)

        forecast = model.predict(future)

        forecast['product_category_name'] = category
        forecast['order_date'] = forecast['ds']
        forecast['demand'] = forecast['yhat']
        results = pd.concat([results, forecast[['order_date', 'product_category_name', 'demand']]])

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpizsxicff/j2_n7tvr.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpizsxicff/v4rtxnry.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=80092', 'data', 'file=/tmp/tmpizsxicff/j2_n7tvr.json', 'init=/tmp/tmpizsxicff/v4rtxnry.json', 'output', 'file=/tmp/tmpizsxicff/prophet_model2e1g1w7j/prophet_model-20240708073228.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
07:32:28 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
07:32:28 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonalit

In [20]:
results_august = results.copy()
results_august.head()

Unnamed: 0,order_date,product_category_name,demand
0,2019-08-01,fashion_shoes,1.366129
1,2019-08-02,fashion_shoes,1.29084
2,2019-08-03,fashion_shoes,1.128064
3,2019-08-04,fashion_shoes,1.192414
4,2019-08-05,fashion_shoes,1.114627


In [21]:
results_august.to_csv('results_august.csv', index=False)

In [29]:
category_demand = results_august.groupby('product_category_name')['demand'].sum().reset_index()

category_demand = category_demand.sort_values(by='demand', ascending=False)

top_5_categories = category_demand.head(10)

print("8월에 가장 수요가 많을 것으로 예상되는 카테고리 10개:")
print(top_5_categories)

8월에 가장 수요가 많을 것으로 예상되는 카테고리 10개:
    product_category_name      demand
38          health_beauty  773.258055
8          bed_bath_table  622.618686
44         home_utilities  563.124970
71          watches_gifts  525.073690
35        furniture_decor  394.037190
64         sports_leisure  280.994785
6              automotive  269.392456
7                    baby  217.834986
37           garden_tools  191.840781
14  computers_accessories  191.357037


# 9월 데이터셋에도 적용

In [22]:
# 2019년 9월 데이터셋 만들기

def get_season(month):
    if month in [12, 1, 2]:
        return 2  
    elif month in [3, 4, 5]:
        return 3  
    elif month in [6, 7, 8]:
        return 4  
    else:
        return 1 


demand_df['season'] = demand_df['order_date'].dt.month.apply(get_season)

dates = pd.date_range(start='2019-09-01', end='2019-09-30', freq='D')

date_list = []

for date in dates:
    date_list.append({'date': date})

X_september = pd.DataFrame(date_list)

X_september['product_category_name'] = ''

X_september['is_weekend'] = X_september['date'].apply(lambda x: 1 if x.weekday() >= 5 else 0)

X_september['season'] = X_september['date'].dt.month.apply(get_season)

X_september.head()

Unnamed: 0,date,product_category_name,is_weekend,season
0,2019-09-01,,1,1
1,2019-09-02,,0,1
2,2019-09-03,,0,1
3,2019-09-04,,0,1
4,2019-09-05,,0,1


In [23]:
results = pd.DataFrame()


for category in demand_df['product_category_name'].unique():
    group = demand_df[demand_df['product_category_name'] == category]

    if not group.empty:  # 카테고리가 존재하는 경우에만 진행
        
        group = group.rename(columns={'order_date': 'ds', 'demand': 'y', 'season': 'season', 'is_weekend': 'is_weekend'})

        model = Prophet()
        
        model.add_regressor('season')
        model.add_regressor('is_weekend')

        model.fit(group[['ds', 'y', 'season', 'is_weekend']])

        future = X_september.rename(columns={'date': 'ds'})

        future['season'] = future['ds'].dt.month.apply(get_season)
        future['is_weekend'] = future['ds'].apply(lambda x: 1 if x.weekday() >= 5 else 0)

        forecast = model.predict(future)

        forecast['product_category_name'] = category
        forecast['order_date'] = forecast['ds']
        forecast['demand'] = forecast['yhat']
        results = pd.concat([results, forecast[['order_date', 'product_category_name', 'demand']]])

INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpizsxicff/3thkgn3b.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpizsxicff/dobgdbmp.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=72495', 'data', 'file=/tmp/tmpizsxicff/3thkgn3b.json', 'init=/tmp/tmpizsxicff/dobgdbmp.json', 'output', 'file=/tmp/tmpizsxicff/prophet_model86zfbcuh/prophet_model-20240708074704.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
07:47:04 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
07:47:04 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling yearly seasonalit

In [24]:
results_september = results.copy()
results_september.head()

Unnamed: 0,order_date,product_category_name,demand
0,2019-09-01,fashion_shoes,1.310319
1,2019-09-02,fashion_shoes,1.232531
2,2019-09-03,fashion_shoes,1.52742
3,2019-09-04,fashion_shoes,1.178044
4,2019-09-05,fashion_shoes,1.480821


In [25]:
results_september.to_csv('results_september.csv', index=False)

In [30]:
category_demand_september = results_september.groupby('product_category_name')['demand'].sum().reset_index()

category_demand_september = category_demand_september.sort_values(by='demand', ascending=False)

top_5_categories = category_demand_september.head(10)

print("9월에 가장 수요가 많을 것으로 예상되는 카테고리 10개:")
print(top_5_categories)

9월에 가장 수요가 많을 것으로 예상되는 카테고리 10개:
   product_category_name      demand
38         health_beauty  786.283578
71         watches_gifts  614.033196
8         bed_bath_table  591.318865
44        home_utilities  506.626352
35       furniture_decor  463.066432
37          garden_tools  270.773206
6             automotive  242.297432
7                   baby  242.054481
64        sports_leisure  223.078903
59             perfumery  213.336913
