# 사용 패키지

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb

In [2]:
# 대회 평가지표
def nmae(true_df, pred_df):
    target_idx = true_df.iloc[:,0]
    pred_df = pred_df[pred_df.iloc[:,0].isin(target_idx)]
    pred_df = pred_df.sort_values(by=[pred_df.columns[0]], ascending=[True])
    true_df = true_df.sort_values(by=[true_df.columns[0]], ascending=[True])
    
    true = true_df.iloc[:,1].to_numpy()
    pred = pred_df.iloc[:,1].to_numpy()
    
    score = np.mean((np.abs(true-pred))/true)
    
    return score

# 데이터 로드

In [3]:
url = 'C:/Users/WIN/Downloads/가스공사/'

In [4]:
total = pd.read_csv(url + '한국가스공사_시간별 공급량_20181231.csv', encoding='cp949')

In [5]:
total.head()

Unnamed: 0,연월일,시간,구분,공급량
0,2013-01-01,1,A,2497.129
1,2013-01-01,2,A,2363.265
2,2013-01-01,3,A,2258.505
3,2013-01-01,4,A,2243.969
4,2013-01-01,5,A,2344.105


# 전처리

In [6]:
total['구분'].unique()

array(['A', 'B', 'C', 'D', 'E', 'G', 'H'], dtype=object)

In [7]:
# 구분을 순자로 변경
d_map = {}
for i, d in enumerate(total['구분'].unique()):
    d_map[d] = i
total['구분'] = total['구분'].map(d_map)

In [8]:
d_map

{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'G': 5, 'H': 6}

In [9]:
total

Unnamed: 0,연월일,시간,구분,공급량
0,2013-01-01,1,0,2497.129
1,2013-01-01,2,0,2363.265
2,2013-01-01,3,0,2258.505
3,2013-01-01,4,0,2243.969
4,2013-01-01,5,0,2344.105
...,...,...,...,...
368083,2018-12-31,20,6,681.033
368084,2018-12-31,21,6,669.961
368085,2018-12-31,22,6,657.941
368086,2018-12-31,23,6,610.953


In [10]:
total['연월일'] = pd.to_datetime(total['연월일'])

In [11]:
total['year'] = total['연월일'].dt.year
total['month'] = total['연월일'].dt.month
total['day'] = total['연월일'].dt.day
total['weekday'] = total['연월일'].dt.weekday

In [12]:
total

Unnamed: 0,연월일,시간,구분,공급량,year,month,day,weekday
0,2013-01-01,1,0,2497.129,2013,1,1,1
1,2013-01-01,2,0,2363.265,2013,1,1,1
2,2013-01-01,3,0,2258.505,2013,1,1,1
3,2013-01-01,4,0,2243.969,2013,1,1,1
4,2013-01-01,5,0,2344.105,2013,1,1,1
...,...,...,...,...,...,...,...,...
368083,2018-12-31,20,6,681.033,2018,12,31,0
368084,2018-12-31,21,6,669.961,2018,12,31,0
368085,2018-12-31,22,6,657.941,2018,12,31,0
368086,2018-12-31,23,6,610.953,2018,12,31,0


## 1,2,3월만으로 재구성

In [13]:
mon = total['month'].isin([1,2,3])
total1 = total[mon]
total1

Unnamed: 0,연월일,시간,구분,공급량,year,month,day,weekday
0,2013-01-01,1,0,2497.129,2013,1,1,1
1,2013-01-01,2,0,2363.265,2013,1,1,1
2,2013-01-01,3,0,2258.505,2013,1,1,1
3,2013-01-01,4,0,2243.969,2013,1,1,1
4,2013-01-01,5,0,2344.105,2013,1,1,1
...,...,...,...,...,...,...,...,...
361483,2018-03-31,20,6,244.162,2018,3,31,5
361484,2018-03-31,21,6,248.059,2018,3,31,5
361485,2018-03-31,22,6,231.181,2018,3,31,5
361486,2018-03-31,23,6,199.022,2018,3,31,5


## 전처리 마무리

In [14]:
# 2018년만 때어서
train_years = [2013,2014,2015,2016,2017]
val_years = [2018]

In [78]:
train_years = [2013,2014,2015,2016,2017,2018]
val_years = [2013,2014,2015,2016,2017,2018]

In [79]:
# 2018년으로 validation set을 생성
train = total1[total1['year'].isin(train_years)]
val = total1[total1['year'].isin(val_years)]

In [80]:
train

Unnamed: 0,연월일,시간,구분,공급량,year,month,day,weekday
0,2013-01-01,1,0,2497.129,2013,1,1,1
1,2013-01-01,2,0,2363.265,2013,1,1,1
2,2013-01-01,3,0,2258.505,2013,1,1,1
3,2013-01-01,4,0,2243.969,2013,1,1,1
4,2013-01-01,5,0,2344.105,2013,1,1,1
...,...,...,...,...,...,...,...,...
361483,2018-03-31,20,6,244.162,2018,3,31,5
361484,2018-03-31,21,6,248.059,2018,3,31,5
361485,2018-03-31,22,6,231.181,2018,3,31,5
361486,2018-03-31,23,6,199.022,2018,3,31,5


In [81]:
features = ['구분', 'month', 'day', 'weekday', '시간']
train_x = train[features]
train_y = train['공급량']

val_x = val[features]
val_y = val['공급량']

In [82]:
train_x

Unnamed: 0,구분,month,day,weekday,시간
0,0,1,1,1,1
1,0,1,1,1,2
2,0,1,1,1,3
3,0,1,1,1,4
4,0,1,1,1,5
...,...,...,...,...,...
361483,6,3,31,5,20
361484,6,3,31,5,21
361485,6,3,31,5,22
361486,6,3,31,5,23


In [83]:
train_y

0         2497.129
1         2363.265
2         2258.505
3         2243.969
4         2344.105
            ...   
361483     244.162
361484     248.059
361485     231.181
361486     199.022
361487     190.212
Name: 공급량, Length: 90888, dtype: float64

# 학습

## 기본 LGBM 모델

In [173]:
d_train = lgb.Dataset(train_x, train_y)
d_val = lgb.Dataset(val_x, val_y)

params = {
    'objective': 'regression',
    'metric':'mae',
    'seed':42
}

model = lgb.train(params, d_train, 500, d_val, verbose_eval=20, early_stopping_rounds=50)

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 75768, number of used features: 5
[LightGBM] [Info] Start training from score 1539.324886
Training until validation scores don't improve for 50 rounds
[20]	valid_0's l1: 343.263
[40]	valid_0's l1: 278.069
[60]	valid_0's l1: 263.963
[80]	valid_0's l1: 259.567
[100]	valid_0's l1: 258.498
[120]	valid_0's l1: 257.487
[140]	valid_0's l1: 259.921
[160]	valid_0's l1: 260.794
Early stopping, best iteration is:
[126]	valid_0's l1: 257.202




In [174]:
pre = model.predict(val_x)
pre

array([1848.41971502, 1660.21747756, 1580.79778636, ...,  422.84451122,
        398.89897323,  364.58076495])

In [175]:
# 평가지표 활용
nmae( pd.DataFrame(val_y).reset_index(drop=True).reset_index() , pd.DataFrame(pre).reset_index())

0.16689029228933744

## KNNR 모델

In [164]:
# 기본
# 1K값을 바꾸어도 결과는 같음 ㅋㅋㅋ
from sklearn.neighbors import KNeighborsRegressor

# 모델 훈련
model = KNeighborsRegressor(n_neighbors=5, weights='distance', p=1, n_jobs=-1) #601
model.fit(train_x, train_y)

KNeighborsRegressor(n_jobs=-1, p=1, weights='distance')

In [165]:
pre = model.predict(val_x)
pre

array([1846.558   , 1782.049   , 1661.4015  , ...,  681.109125,
        347.398   ,  323.415   ])

In [171]:
# 평가지표 활용
nmae( pd.DataFrame(val_y).reset_index(drop=True).reset_index() , pd.DataFrame(pre).reset_index())

0.1810682444276462

## 딥러닝 저장 메소드

In [249]:
# 모델 저장하기
from keras.models import load_model

model.save(url + '/model/DeepAll_176.h5')

In [302]:
# 모델 불러오기
from keras.models import load_model

model = load_model(url + '/model/DeepAll_176.h5')

## 기본 딥러닝

In [84]:
train_x

Unnamed: 0,구분,month,day,weekday,시간
0,0,1,1,1,1
1,0,1,1,1,2
2,0,1,1,1,3
3,0,1,1,1,4
4,0,1,1,1,5
...,...,...,...,...,...
361483,6,3,31,5,20
361484,6,3,31,5,21
361485,6,3,31,5,22
361486,6,3,31,5,23


In [165]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(64, activation='linear', input_shape=(5,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='linear'))

In [303]:
import tensorflow as tf 

model.compile(optimizer='Adam',
             #optimizer= tf.optimizers.Adamax(lr=0.1, beta_1=0.9, beta_2=0.999) ,
             loss='MAE',
             metrics=['mae', 'mse'])

In [None]:
history = model.fit(train_x, train_y, epochs=1000, batch_size=128, validation_data=(val_x, val_y))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000


Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000


Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000


Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000


Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/1000


Epoch 222/1000
Epoch 223/1000
Epoch 224/1000
Epoch 225/1000
Epoch 226/1000
Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000


Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000
Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000


Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000
Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000


Epoch 354/1000
Epoch 355/1000
Epoch 356/1000
Epoch 357/1000
Epoch 358/1000
Epoch 359/1000
Epoch 360/1000
Epoch 361/1000
Epoch 362/1000
Epoch 363/1000
Epoch 364/1000
Epoch 365/1000
Epoch 366/1000
Epoch 367/1000
Epoch 368/1000
Epoch 369/1000
Epoch 370/1000
Epoch 371/1000
Epoch 372/1000
Epoch 373/1000
Epoch 374/1000
Epoch 375/1000
Epoch 376/1000
Epoch 377/1000
Epoch 378/1000
Epoch 379/1000
Epoch 380/1000
Epoch 381/1000
Epoch 382/1000
Epoch 383/1000
Epoch 384/1000
Epoch 385/1000
Epoch 386/1000
Epoch 387/1000
Epoch 388/1000
Epoch 389/1000
Epoch 390/1000
Epoch 391/1000
Epoch 392/1000
Epoch 393/1000
Epoch 394/1000
Epoch 395/1000
Epoch 396/1000
Epoch 397/1000


Epoch 398/1000
Epoch 399/1000
Epoch 400/1000
Epoch 401/1000
Epoch 402/1000
Epoch 403/1000
Epoch 404/1000
Epoch 405/1000
Epoch 406/1000
Epoch 407/1000
Epoch 408/1000
Epoch 409/1000
Epoch 410/1000
Epoch 411/1000
Epoch 412/1000
Epoch 413/1000
Epoch 414/1000
Epoch 415/1000
Epoch 416/1000
Epoch 417/1000
Epoch 418/1000
Epoch 419/1000
Epoch 420/1000
Epoch 421/1000
Epoch 422/1000
Epoch 423/1000
Epoch 424/1000
Epoch 425/1000
Epoch 426/1000
Epoch 427/1000
Epoch 428/1000
Epoch 429/1000
Epoch 430/1000
Epoch 431/1000
Epoch 432/1000
Epoch 433/1000
Epoch 434/1000
Epoch 435/1000
Epoch 436/1000
Epoch 437/1000
Epoch 438/1000
Epoch 439/1000
Epoch 440/1000
Epoch 441/1000


Epoch 442/1000
Epoch 443/1000
Epoch 444/1000
Epoch 445/1000
Epoch 446/1000
Epoch 447/1000
Epoch 448/1000
Epoch 449/1000
Epoch 450/1000
Epoch 451/1000
Epoch 452/1000
Epoch 453/1000
Epoch 454/1000
Epoch 455/1000
Epoch 456/1000
Epoch 457/1000
Epoch 458/1000
Epoch 459/1000
Epoch 460/1000
Epoch 461/1000
Epoch 462/1000
Epoch 463/1000
Epoch 464/1000
Epoch 465/1000
Epoch 466/1000
Epoch 467/1000
Epoch 468/1000
Epoch 469/1000
Epoch 470/1000
Epoch 471/1000
Epoch 472/1000
Epoch 473/1000
Epoch 474/1000
Epoch 475/1000
Epoch 476/1000
Epoch 477/1000
Epoch 478/1000
Epoch 479/1000
Epoch 480/1000
Epoch 481/1000
Epoch 482/1000

In [None]:
# 모델 loss 그래프

fit_loss = history.history['mae']
# fit_loss2 = history.history['mse']

x_len = np.arange(len(fit_loss))
plt.plot(x_len, fit_loss, marker=',', c='red', label='MAE')
# plt.plot(x_len, fit_loss2, marker=',', c='blue', label='MSE')

plt.legend(loc='upper right')
plt.grid()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

In [None]:
pre = model.predict(val_x)
pre

In [None]:
nmae(pd.DataFrame(val_y).reset_index(drop=True).reset_index(),pd.DataFrame(pre).reset_index())

In [148]:
# d_train = lgb.Dataset(train_x, train_y)
# d_val = lgb.Dataset(val_x, val_y)

# params = {
#     'objective': 'regression',
#     'metric':'mae',
#     'seed':42
# }

# model = lgb.train(params, d_train, 500, d_val, verbose_eval=20, early_stopping_rounds=10)



You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 90888, number of used features: 5
[LightGBM] [Info] Start training from score 1565.788903
Training until validation scores don't improve for 10 rounds
[20]	valid_0's l1: 257.626
[40]	valid_0's l1: 190.049
[60]	valid_0's l1: 170.322
[80]	valid_0's l1: 160.565
[100]	valid_0's l1: 154.2
[120]	valid_0's l1: 149.138
[140]	valid_0's l1: 145.172
[160]	valid_0's l1: 140.636
[180]	valid_0's l1: 136.419
[200]	valid_0's l1: 132.174
[220]	valid_0's l1: 127.898
[240]	valid_0's l1: 123.783
[260]	valid_0's l1: 120.119
[280]	valid_0's l1: 116.587
[300]	valid_0's l1: 113.59
[320]	valid_0's l1: 111.066
[340]	valid_0's l1: 109.122
[360]	valid_0's l1: 107.191
[380]	valid_0's l1: 104.657
[400]	valid_0's l1: 102.049
[420]	valid_0's l1: 99.1702
[440]	valid_0's l1: 96.6724
[460]	valid_0's l1: 94.9101


# 추론 및 결과 제출

In [97]:
test = pd.read_csv(url + 'test.csv')
submission = pd.read_csv(url + 'sample_submission.csv')

In [98]:
test.head()

Unnamed: 0,일자|시간|구분
0,2019-01-01 01 A
1,2019-01-01 02 A
2,2019-01-01 03 A
3,2019-01-01 04 A
4,2019-01-01 05 A


In [99]:
submission.head()

Unnamed: 0,일자|시간|구분,공급량
0,2019-01-01 01 A,0
1,2019-01-01 02 A,0
2,2019-01-01 03 A,0
3,2019-01-01 04 A,0
4,2019-01-01 05 A,0


In [100]:
test['일자'] = test['일자|시간|구분'].str.split(' ').str[0]
test['시간'] = test['일자|시간|구분'].str.split(' ').str[1].astype(int)
test['구분'] = test['일자|시간|구분'].str.split(' ').str[2]

In [101]:
test

Unnamed: 0,일자|시간|구분,일자,시간,구분
0,2019-01-01 01 A,2019-01-01,1,A
1,2019-01-01 02 A,2019-01-01,2,A
2,2019-01-01 03 A,2019-01-01,3,A
3,2019-01-01 04 A,2019-01-01,4,A
4,2019-01-01 05 A,2019-01-01,5,A
...,...,...,...,...
15115,2019-03-31 20 H,2019-03-31,20,H
15116,2019-03-31 21 H,2019-03-31,21,H
15117,2019-03-31 22 H,2019-03-31,22,H
15118,2019-03-31 23 H,2019-03-31,23,H


In [102]:
test['일자'] = pd.to_datetime(test['일자'])
test['year'] = test['일자'].dt.year
test['month'] = test['일자'].dt.month
test['day'] = test['일자'].dt.day
test['weekday'] = test['일자'].dt.weekday

In [103]:
test

Unnamed: 0,일자|시간|구분,일자,시간,구분,year,month,day,weekday
0,2019-01-01 01 A,2019-01-01,1,A,2019,1,1,1
1,2019-01-01 02 A,2019-01-01,2,A,2019,1,1,1
2,2019-01-01 03 A,2019-01-01,3,A,2019,1,1,1
3,2019-01-01 04 A,2019-01-01,4,A,2019,1,1,1
4,2019-01-01 05 A,2019-01-01,5,A,2019,1,1,1
...,...,...,...,...,...,...,...,...
15115,2019-03-31 20 H,2019-03-31,20,H,2019,3,31,6
15116,2019-03-31 21 H,2019-03-31,21,H,2019,3,31,6
15117,2019-03-31 22 H,2019-03-31,22,H,2019,3,31,6
15118,2019-03-31 23 H,2019-03-31,23,H,2019,3,31,6


In [104]:
test['구분'].map

<bound method Series.map of 0        A
1        A
2        A
3        A
4        A
        ..
15115    H
15116    H
15117    H
15118    H
15119    H
Name: 구분, Length: 15120, dtype: object>

In [105]:
test['구분'] = test['구분'].map(d_map)

In [106]:
test_x = test[features]

In [107]:
test_x

Unnamed: 0,구분,month,day,weekday,시간
0,0,1,1,1,1
1,0,1,1,1,2
2,0,1,1,1,3
3,0,1,1,1,4
4,0,1,1,1,5
...,...,...,...,...,...
15115,6,3,31,6,20
15116,6,3,31,6,21
15117,6,3,31,6,22
15118,6,3,31,6,23


In [108]:
# preds = model.predict(test_x)
preds = model.predict(test_x)

In [109]:
preds

array([[2051.6062 ],
       [2137.2012 ],
       [2222.7964 ],
       ...,
       [ 284.3889 ],
       [ 284.49387],
       [ 284.59933]], dtype=float32)

In [110]:
submission['공급량'] = preds

In [111]:
submission.to_csv(url + '/output/DeepA_279.csv', index=False)