In [2]:
import sys
sys.path.append("../")

from datetime import datetime as dt

import numpy as np
import pandas as pd

from src.utils import DataPreprocessor as DP
from src.utils import Postprocessor as PP
from src.models import ManshionBrothers as MB

In [3]:
## データ読み込み
df_train = pd.read_csv("../data/train_set.csv", low_memory=False)
df_test = pd.read_csv("../data/test_set.csv", low_memory=False)

In [4]:
## 前処理＠Residential
# 変数選択
nc_resi = ["BATHRM", "EYB"]
cc_resi = ["QUADRANT"]
# 前処理
dp_resi = DP()
dp_resi.load_dataframe(df_train.query('SOURCE=="Residential"'), df_test.query('SOURCE=="Residential"'))
dp_resi.set_cols(num_cols=nc_resi, cat_cols=cc_resi)
dp_resi.compile()

## 前処理＠Condominium
# 変数選択
nc_cond = ["BATHRM", "EYB"]
cc_cond = ["QUADRANT"]
# 前処理
dp_cond = DP()
dp_cond.load_dataframe(df_train.query('SOURCE=="Condominium"'), df_test.query('SOURCE=="Condominium"'))
dp_cond.set_cols(num_cols=nc_cond, cat_cols=cc_cond)
dp_cond.compile()


## モデルの準備＠Residential
mb_resi = MB()
mb_resi.compile(models=["LGB"])

## モデルの準備＠Condominium
mb_cond = MB()
mb_cond.compile(models=["LGB"])

In [5]:
## 検証＠Residential
x_tr_resi, x_vl_resi, y_tr_resi, y_vl_resi = dp_resi.get_validation_data()
# 学習＆予測
mb_resi.train(x_tr_resi, np.log1p(y_tr_resi))
y_pre_resi = mb_resi.predict(x_vl_resi)
# 評価
print("Residential")
_ = mb_resi.evaluate(y_pre_resi, np.log1p(y_vl_resi))

print("~"*30)

## 検証＠Condominium
x_tr_cond, x_vl_cond, y_tr_cond, y_vl_cond = dp_cond.get_validation_data()
# 学習＆予測
mb_cond.train(x_tr_cond, np.log1p(y_tr_cond))
y_pre_cond = mb_cond.predict(x_vl_cond)
# 評価
print("Condominium")
_ = mb_cond.evaluate(y_pre_cond, np.log1p(y_vl_cond))

Residential
MSE  : 0.41148120228468615
MSLE : 0.002373805802065891
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Condominium
MSE  : 0.6073580373613809
MSLE : 0.00292177790843747


In [15]:
## 提出用に学習
mb_resi.train(dp_resi.x_train, np.log1p(dp_resi.y_train))
mb_cond.train(dp_cond.x_train, np.log1p(dp_cond.y_train))

## 提出用に予測
y_pred_resi = mb_resi.predict(dp_resi.x_test)
y_pred_cond = mb_cond.predict(dp_cond.x_test)

## 提出用csv作成
submit_resi = PP.make_submit_csv(np.expm1(y_pred_resi), dp_resi.x_test)
submit_cond = PP.make_submit_csv(np.expm1(y_pred_cond), dp_cond.x_test)
submit = pd.concat([submit_resi, submit_cond], axis=0)
submit["Id"] = df_test["Id"]
submit = submit[["Id", "PRICE"]]

## 結果保存
fn = dt.now().strftime("%Y%m%d_%H%M%S") + ".csv"
submit.to_csv("../data/submit/"+fn, header=True, index=False)