# 공용 함수
- 수정하지 않는 것을 추천드립니다

In [1]:
from jy import get_all, get_scaled
import pandas as pd
import plotly.express as px
from neuralprophet import NeuralProphet, set_log_level
set_log_level("ERROR")

In [2]:
# 상대경로 지정 없이 원본 csv 임포트
main_df = get_all()
scaled_df = get_scaled(0) #0 = sd, 1 = nm, 2 = mm, 3 = rb
main_df_columns = main_df.columns
scaled_columns = scaled_df.columns
date_df = main_df.loc[:,"date"]

In [3]:
# 각 지역구와 관련된 변수들
# common은 지역 구분 없는 전체 변수
gw_vars_scaled = scaled_df.loc[:,scaled_columns[1:8]]
ddm_vars_scaled = scaled_df.loc[:,scaled_columns[8:15]]
sd_vars_scaled = scaled_df.loc[:,scaled_columns[15:22]]
jl_vars_scaled = scaled_df.loc[:,scaled_columns[22:29]]
common_vars_scaled = scaled_df.loc[:,scaled_columns[29:45]]

In [4]:
# 각 지역구와 관련된 변수들
# common은 지역 구분 없는 전체 변수
gw_vars = main_df.loc[:,main_df_columns[2:9]]
ddm_vars = main_df.loc[:,main_df_columns[9:16]]
sd_vars = main_df.loc[:,main_df_columns[16:23]]
jl_vars = main_df.loc[:,main_df_columns[23:30]]
common_vars = main_df.loc[:,main_df_columns[30:46]]

In [5]:
# 각 지역구의 따릉이 수요량 
gw_demand=main_df.loc[:,main_df_columns[46]]
ddm_demand=main_df.loc[:,main_df_columns[47]]
sd_demand=main_df.loc[:,main_df_columns[48]]
jl_demand=main_df.loc[:,main_df_columns[49]]

In [6]:
# 날짜와 수요량 컬럼을 원하는 대로 정리해주는 함수
def create_y (df_demand, date_name, y_name):
    result = pd.concat([date_df, df_demand], axis=1)
    result = result.rename(columns={"date":date_name, result.columns[1]:y_name})
    return result

In [7]:
# 변수들을 일부 샘플링한 dataframe의 리스트를 반환하는 함수
## amount는 (수요량 이외에) 추가로 샘플링할 변수의 개수 (0, 1, 2 가능. 0은 수요량만 반환)
## df_y는 날짜와 수요량이 포함된 데이터프레임. create_y 함수로 정제한 값을 넣으면 됩니다
## df_list는 추가 변수로 사용할 모든 데이터프레임들의 리스트. ~~~_vars 중 원하는 것을 리스트로 넣으면 됩니다
    ## 예시) 광진구 + 공용 변수의 경우: df_list=[gw_vars, comon_vars]
## 반환하는 값은 튜플 {"data": 데이터프레임, "name":포함된 컬럼명의 리스트} 로 이루어진 리스트입니다
    
def sample_multivariate(amount, df_y, df_list):
    samples = []
    # 데이터프레임을 하나로 통합
    df = pd.concat(df_list, axis=1)
    if amount == 0:
        # y값만 리턴
        samples.append({"data":pd.concat([df_y], axis=1),"name":[]})
    elif amount == 1:
        # sample을 한 column씩 추출
        for index in range(len(df.columns)):
            sample = pd.concat([df_y,df.loc[:,df.columns[index]]],axis=1)
            samples.append({"data":sample, "name":[df.columns[index]]})
    elif amount == 2:
        # sample을 두 column씩 추출
        for first_index in range(len(df.columns)):
            remaining_columns = df.columns[first_index+1:]
            for second_index in range(len(remaining_columns)):
                targets = [df.columns[first_index],remaining_columns[second_index]]
                sample = pd.concat([df_y,df.loc[:,targets]],axis=1)
                samples.append({"data":sample, "name":targets})
    return samples
    

In [8]:
# 데이터를 자르는 함수
# 일단은 2021년 1월 1일 기준으로 자르게 일반화 해놓았습니다

def split_sample(df):
    train = df.loc[:1095]
    test = df.loc[1096:]
    return {"train":train, "test":test}


In [9]:
def create_result(indexes):
    return pd.DataFrame(index=["Column"] + indexes)

공용 함수 End

In [10]:
names = ["GW","DDM","SD","JL"]
columns = [["air_","pop_female_"],["real_pop_","mvpop_"],["pop_female_","mvpop_"]]

In [11]:
all_dfs=[]
for name in names:
    print(name)
    each_dfs = []
    for column in columns:
        result = ["date",f"{name}_demand",f"{column[0]}{name}",f"{column[1]}{name}"]
        each_dfs.append(main_df.loc[:,result])
    all_dfs.append(each_dfs)



#vars = main_df.loc[:,]

GW
DDM
SD
JL


In [12]:
all_dfs[1][0].columns[2:]

Index(['air_DDM', 'pop_female_DDM'], dtype='object')

In [13]:
total_result=[]
for index, name in enumerate(names):
    result_df = pd.DataFrame({})
    maes = []
    for num in range(3):
        df = all_dfs[index][num].rename(columns={"date":"ds",f"{name}_demand":"y"})     
        test = split_sample(df)["test"]
        train = split_sample(df)["train"]
        m = NeuralProphet(
            growth='off', # 추세 유형 설정(linear, discontinuous, off 중 선택 가능)
            yearly_seasonality=True, #년간 계절성 설정
            weekly_seasonality=True, #주간 계절성 설정
            daily_seasonality=True, #일간 계절성 설정
            batch_size=32,#배치 사이즈 설정
            epochs=30,#학습 횟수 설정
            learning_rate=0.1, # 학습률 설정
        )
        column_names = [f"{columns[num][0]}{name}",f"{columns[num][1]}{name}"]
        m = m.add_lagged_regressor(names=column_names,normalize="minmax")
        m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
        # metrics = m.fit(train, freq="D",validation_df=test)
        # maes.append(metrics.loc[29,"MAE_val"])

    result_df[name] = maes
    total_result.append(result_df)

In [None]:
total_result[0]

In [None]:
total_result[1]

In [None]:
total_result[2]

In [None]:
total_result[3]

In [14]:
final = ["pop_female_","mvpop_"]
final_models=[]
for index, name in enumerate(names):
    final_df = all_dfs[index][2].rename(columns={"date":"ds",f"{name}_demand":"y"})     
    test = split_sample(final_df)["test"]
    train = split_sample(final_df)["train"]
    final_column = [final[0]+name, final[1]+name]
    m = NeuralProphet(
        growth='off', # 추세 유형 설정(linear, discontinuous, off 중 선택 가능)
        yearly_seasonality=True, #년간 계절성 설정
        weekly_seasonality=True, #주간 계절성 설정
        daily_seasonality=True, #일간 계절성 설정
        batch_size=32,#배치 사이즈 설정
        epochs=30,#학습 횟수 설정
        learning_rate=0.1, # 학습률 설정
    )

    m = m.add_lagged_regressor(names=final_column,normalize="minmax")
    m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
    metrics = m.fit(train, freq="D",validation_df=test)
    final_models.append(m)
    

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [21]:
final_models

[<neuralprophet.forecaster.NeuralProphet at 0x27b97c2e8b0>,
 <neuralprophet.forecaster.NeuralProphet at 0x27b97c45100>,
 <neuralprophet.forecaster.NeuralProphet at 0x27bee754dc0>,
 <neuralprophet.forecaster.NeuralProphet at 0x27b97c45c40>]

In [39]:
df = all_dfs[0][2].rename(columns={"date":"ds",f"GW_demand":"y"})     
test = split_sample(df)["test"]

In [40]:
test

Unnamed: 0,ds,y,pop_female_GW,mvpop_GW
1096,1/1/2021,2.070,166964,103938
1097,1/2/2021,2.062,166964,159088
1098,1/3/2021,1.918,166964,131982
1099,1/4/2021,3.238,166964,280624
1100,1/5/2021,2.864,166964,284649
...,...,...,...,...
1456,12/27/2021,3.830,164058,332162
1457,12/28/2021,4.510,164058,347442
1458,12/29/2021,4.490,164058,359176
1459,12/30/2021,4.444,164058,377097


In [42]:
res = final_models[0].predict(test)

Predicting: 35it [00:00, ?it/s]

In [None]:
res.to_csv(res)

In [None]:
future = pd.date_range("2022-01-01",periods=365,freq="D")

p_data = pd.DataFrame({"ds":future,"y":[0]*365})


# data=future.loc[:,["ds","y"]]
# data
forecast1 = final_models[0].predict(p_data)