In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import sklearn
import warnings
warnings.filterwarnings('ignore')
import sys
plt.rc("font", family="Malgun Gothic")

In [12]:
from sklearn.linear_model import ElasticNet, Lasso
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import lightgbm as lgb

In [13]:
apt_price = pd.read_csv('아파트csv/부동산 집값_예측/아파트_전처리.csv',encoding='utf8')

In [14]:
X = apt_price.drop(columns='거래금액(만원)')
y = apt_price['거래금액(만원)']

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)

In [6]:
forest = RandomForestRegressor(n_estimators = 100, n_jobs = -1, random_state=42)
xgboost = xgb.XGBRegressor(random_state=42)
lightgbm = lgb.LGBMRegressor(random_state=42, num_leaves = 100, min_data_in_leaf = 15, max_depth=6,
                            learning_rate = 0.1, min_child_samples = 30, feature_fraction=0.9, bagging_freq= 1,
                            bagging_fraction = 0.9, bagging_seed = 11, lambda_l1 = 0.1, verbosity = -1 )

In [7]:
models = [{'model':xgboost, 'name':'XGBoost'},
          {'model':lightgbm, 'name':'LightGBM'},
         {'model':forest, 'name' : 'RandomForest'}]

def AveragingBlending(models, x, y, sub_x):
    for m in models : 
        m['model'].fit(x.values, y)
    
    predictions = np.column_stack([m['model'].predict(sub_x.values) for m in models])
    return predictions

In [8]:
y_test_pred = AveragingBlending(models, X_train, y_train, X_test)



In [9]:
predictions = (y_test_pred[:, 0]*0.05 + y_test_pred[:, 1]*0.1 + y_test_pred[:, 2]*0.85)
predictions

array([11.51268259, 10.50755248, 11.03836578, ..., 10.26386143,
        9.46771205,  9.93681905])

In [30]:
apt_price

Unnamed: 0,전용면적(㎡),계약일,거래금액(만원),층,건축년도,구,동,평,계약년,계약월,한강,건물나이,재건축
0,4.366278,1,11.097425,2.197225,1988,24,293,3.284664,2012,5,0.0,24,0.000000
1,4.366278,9,10.968216,1.945910,1988,24,293,3.284664,2012,7,0.0,24,0.000000
2,4.658047,13,11.350418,2.397895,1984,24,293,3.569533,2012,4,0.0,28,0.000000
3,4.907495,8,11.691080,2.944439,2004,24,293,3.813307,2012,5,0.0,8,0.000000
4,3.939249,9,11.258046,1.791759,1982,24,293,2.867899,2012,1,0.0,30,0.693147
...,...,...,...,...,...,...,...,...,...,...,...,...,...
824116,4.106932,20,11.211834,2.079442,1997,3,67,3.030134,2022,7,0.0,25,0.000000
824117,4.443004,27,11.424105,2.833213,1997,3,67,3.356897,2022,7,0.0,25,0.000000
824118,4.450736,4,10.819798,1.945910,2003,3,67,3.367296,2021,11,0.0,18,0.000000
824119,4.450736,9,10.817796,2.484907,2003,3,67,3.367296,2022,4,0.0,19,0.000000


## Test 예측값

In [13]:
pd.DataFrame(np.expm1(predictions).reshape(-1,1),index=X_test.index)

Unnamed: 0,0
700619,99974.715330
218807,36589.813114
519467,62214.894427
616265,34921.117493
319603,28274.580039
...,...
350041,26670.634675
246132,21476.014832
632826,28676.307955
645042,12934.258078


## Test 실제값

In [18]:
pd.DataFrame(np.expm1(pd.read_csv('아파트_전처리.csv',encoding='utf8').iloc[X_test.index,2]))

Unnamed: 0,거래금액(만원)
700619,82000.0
218807,34900.0
519467,66500.0
616265,34800.0
319603,29350.0
...,...
350041,30000.0
246132,22000.0
632826,31500.0
645042,12500.0


In [1]:
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
import xgboost as xgb
import lightgbm as lgb
import joblib
import numpy as np
import pandas as pd
import sklearn

  from pandas import MultiIndex, Int64Index


In [4]:
XGB1 = joblib.load('아파트csv/부동산 집값_예측/XGB1.pkl')
LGBM1 = joblib.load('아파트csv/부동산 집값_예측/LGBM1.pkl')
RandomForest = joblib.load('아파트csv/부동산 집값_예측/RandomForest1.pkl')

In [16]:
np.expm1(XGB1.predict(X_test.values)*0.05 + LGBM1.predict(X_test.values)*0.1 + RandomForest.predict(X_test.values)*0.85)

array([50575.79868118, 31176.9463937 , 33637.52209896, ...,
       38545.75602298, 70361.55135835, 63774.86704556])