## 01. Import Library

In [None]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.ensemble import RandomForestRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
import warnings
warnings.filterwarnings(action='ignore') 

## 02.Fixed Random-Seed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## 03. Load Data

In [None]:
train = pd.read_csv('/kaggle/input/dacon-electronic-usage0726/train.csv')
test = pd.read_csv('/kaggle/input/dacon-electronic-usage0726/test.csv')
sub = pd.read_csv('/kaggle/input/dacon-electronic-usage0726/sample_submission.csv')
building_info = pd.read_csv('/kaggle/input/dacon-electronic-usage0726/building_info.csv')

## 04. Train Data Pre-Processing

In [None]:
#결측값을 0으로 채웁니다
train = train.fillna(0)

In [None]:
#시계열 특성을 학습에 반영하기 위해 일시를 월, 일, 시간으로 나눕니다
train['month'] = train['일시'].apply(lambda x : int(x[4:6]))
train['day'] = train['일시'].apply(lambda x : int(x[6:8]))
train['time'] = train['일시'].apply(lambda x : int(x[9:11]))
train.head()

In [None]:
train_x = train.drop(columns=['num_date_time', '일시', '일조(hr)', '일사(MJ/m2)', '전력소비량(kWh)'])
train_y = train['전력소비량(kWh)']

## Regression Model Fit

In [None]:
model = RandomForestRegressor(n_estimators=2000)
model.fit(train_x, train_y)

## Test Data Pre-Processing

In [None]:
test['month'] = test['일시'].apply(lambda x : int(x[4:6]))
test['day'] = test['일시'].apply(lambda x : int(x[6:8]))
test['time'] = test['일시'].apply(lambda x : int(x[9:11]))

In [None]:
test_x = test.drop(columns=['num_date_time', '일시'])

## Inference

In [None]:
preds = model.predict(test_x)

## Submission

In [None]:
sub.head()

In [None]:
sub['answer'] = preds
sub

In [None]:
sub.to_csv('./baseline_submission.csv', index=False)