In [1]:
from utils import *

import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_log_error, mean_absolute_error
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from xgboost import XGBRegressor, DMatrix, cv
from xgboost import train as train_xgb

## Data description

In [2]:
macro_df = pd.read_csv('data/macro.csv', parse_dates=['timestamp'])
train_df = pd.read_csv('data/train.csv', index_col='id', parse_dates=['timestamp'])
test_df = pd.read_csv('data/test.csv', index_col='id', parse_dates=['timestamp'])
tverskoe_issue_fix(train_df)
tverskoe_issue_fix(test_df)

Fix:  550
Fix:  149


## 1. Data preprocessing
## I part (encoding and correcting mistakes)

### Macro dataset

In [3]:
macro_df['child_on_acc_pre_school'] = macro_df['child_on_acc_pre_school'].str.replace('#!', 'nan')
for column in macro_df.select_dtypes('object').columns:
    macro_df[column] = macro_df[column].str.replace(',', '.')
    macro_df[column] = macro_df[column].astype(float)

if not len(macro_df.select_dtypes('object').columns):
    print('OK')

OK


### Train dataset

In [4]:
train_df = encode(train_df)

### Test dataset

In [5]:
test_df = encode(test_df)

## II part (Filling missing values)

XGBRegressor model handles `np.NaN` values itself

## 2. Encoding `sub_area` feature

In [6]:
train_df['is_train'] = 1
test_df['is_train'] = 0

all_df = pd.concat([train_df, test_df])

coords_df = pd.read_csv('data/coords.csv', index_col='id')
all_df['latitude'] = coords_df['latitude']
all_df['longitude'] = coords_df['longitude']

## 3. Removing outliers

In [7]:
all_df = remove_outliers(all_df)

## 4. Feature engineering

In [8]:
all_df = create_new_features(all_df)

## 5. Removing fake prices

In [9]:
train_df = all_df[all_df['is_train'] == 1].drop(['is_train'], axis=1)
test_df = all_df[all_df['is_train'] == 0].drop(['is_train', 'price_doc'], axis=1)

In [10]:
# train_df = remove_fake_prices(train_df)
idx_outliers = np.loadtxt('data/idx_outliers.txt').astype(int)
train_df = train_df.drop(idx_outliers)

## 6. Modeling

### full

In [11]:
X = train_df.drop(['sub_area', 'price_doc'], axis=1).copy()
y = np.log1p(train_df['price_doc'])
X_test = test_df.drop(['sub_area'], axis=1).copy()

In [None]:
X.shape

In [12]:
dtrain = DMatrix(X, label=y)

In [13]:
params = {'objective': 'reg:squarederror',
          # 'n_estimators': 10000,
          'tree_method': 'gpu_hist',
          'booster': 'gbtree',
          'base_score': 7,
          'learning_rate': 0.05,
          'max_depth': 4,
          'min_child_weight': 7,
          'subsample': 1,
          'colsample_bytree': 0.9,
          'reg_lambda': 5,
          'reg_alpha': 1,
          'eval_metric': 'rmse',
          'seed': 42,
          'nthread': -1
          }

In [14]:
cv_results = cv(
    params,
    dtrain,
    num_boost_round=5000,
    early_stopping_rounds=50,
    nfold=5,
    shuffle=True,
    metrics={'rmse'},
    verbose_eval=True,
    seed=42,
)

cv_results

[0]	train-rmse:8.30445+0.00179	test-rmse:8.30447+0.00740
[1]	train-rmse:7.89017+0.00170	test-rmse:7.89022+0.00722
[2]	train-rmse:7.49658+0.00161	test-rmse:7.49661+0.00713
[3]	train-rmse:7.12271+0.00153	test-rmse:7.12274+0.00690
[4]	train-rmse:6.76749+0.00146	test-rmse:6.76757+0.00682
[5]	train-rmse:6.43007+0.00139	test-rmse:6.43015+0.00669
[6]	train-rmse:6.10951+0.00133	test-rmse:6.10954+0.00653
[7]	train-rmse:5.80499+0.00126	test-rmse:5.80503+0.00640
[8]	train-rmse:5.51570+0.00119	test-rmse:5.51572+0.00629
[9]	train-rmse:5.24094+0.00113	test-rmse:5.24096+0.00632
[10]	train-rmse:4.97987+0.00107	test-rmse:4.97990+0.00614
[11]	train-rmse:4.73186+0.00103	test-rmse:4.73194+0.00600
[12]	train-rmse:4.49626+0.00098	test-rmse:4.49638+0.00596
[13]	train-rmse:4.27246+0.00094	test-rmse:4.27257+0.00582
[14]	train-rmse:4.05983+0.00088	test-rmse:4.05999+0.00578
[15]	train-rmse:3.85787+0.00084	test-rmse:3.85798+0.00567
[16]	train-rmse:3.66600+0.00079	test-rmse:3.66605+0.00558
[17]	train-rmse:3.48374+

Unnamed: 0,train-rmse-mean,train-rmse-std,test-rmse-mean,test-rmse-std
0,8.304450,0.001786,8.304470,0.007400
1,7.890175,0.001697,7.890221,0.007220
2,7.496581,0.001613,7.496606,0.007125
3,7.122706,0.001532,7.122744,0.006904
4,6.767495,0.001456,6.767573,0.006819
...,...,...,...,...
2357,0.079705,0.000429,0.127741,0.002139
2358,0.079693,0.000428,0.127739,0.002140
2359,0.079683,0.000428,0.127738,0.002141
2360,0.079672,0.000427,0.127738,0.002142


In [None]:
(2406, 0.12757579999999996)

In [16]:
cv_results['test-rmse-mean'].argmin(), cv_results['test-rmse-mean'].min()

(2361, 0.1277364)

### full (0.8)

In [17]:
X = train_df.drop(['sub_area', 'price_doc'], axis=1).copy()
y = np.log1p(train_df['price_doc'])
X_test = test_df.drop(['sub_area'], axis=1).copy()

In [18]:
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)
X, y = X_train, y_train,

In [19]:
dtrain = DMatrix(X, label=y)

In [20]:
params = {'objective': 'reg:squarederror',
          # 'n_estimators': 10000,
          'tree_method': 'gpu_hist',
          'booster': 'gbtree',
          'base_score': 7,
          'learning_rate': 0.05,
          'max_depth': 4,
          'min_child_weight': 7,
          'subsample': 1,
          'colsample_bytree': 0.9,
          'reg_lambda': 5,
          'reg_alpha': 1,
          'eval_metric': 'rmse',
          'seed': 42,
          'nthread': -1
          }

In [21]:
cv_results = cv(
    params,
    dtrain,
    num_boost_round=5000,
    early_stopping_rounds=50,
    nfold=5,
    shuffle=True,
    metrics={'rmse'},
    verbose_eval=True,
    seed=42,
)

cv_results

[0]	train-rmse:8.30560+0.00267	test-rmse:8.30558+0.01089
[1]	train-rmse:7.89137+0.00254	test-rmse:7.89136+0.01060
[2]	train-rmse:7.49780+0.00241	test-rmse:7.49778+0.01028
[3]	train-rmse:7.12395+0.00230	test-rmse:7.12393+0.01003
[4]	train-rmse:6.76876+0.00218	test-rmse:6.76875+0.00968
[5]	train-rmse:6.43136+0.00208	test-rmse:6.43139+0.00938
[6]	train-rmse:6.11080+0.00198	test-rmse:6.11089+0.00909
[7]	train-rmse:5.80628+0.00188	test-rmse:5.80639+0.00887
[8]	train-rmse:5.51702+0.00179	test-rmse:5.51711+0.00861
[9]	train-rmse:5.24227+0.00172	test-rmse:5.24241+0.00847
[10]	train-rmse:4.98121+0.00163	test-rmse:4.98131+0.00819
[11]	train-rmse:4.73320+0.00156	test-rmse:4.73327+0.00786
[12]	train-rmse:4.49761+0.00148	test-rmse:4.49771+0.00752
[13]	train-rmse:4.27380+0.00142	test-rmse:4.27393+0.00720
[14]	train-rmse:4.06120+0.00134	test-rmse:4.06132+0.00697
[15]	train-rmse:3.85922+0.00127	test-rmse:3.85941+0.00678
[16]	train-rmse:3.66737+0.00121	test-rmse:3.66759+0.00647
[17]	train-rmse:3.48512+

Unnamed: 0,train-rmse-mean,train-rmse-std,test-rmse-mean,test-rmse-std
0,8.305604,0.002668,8.305585,0.010894
1,7.891367,0.002537,7.891359,0.010599
2,7.497804,0.002413,7.497784,0.010278
3,7.123949,0.002295,7.123931,0.010028
4,6.768765,0.002184,6.768754,0.009682
...,...,...,...,...
2317,0.074743,0.000371,0.129398,0.003252
2318,0.074732,0.000372,0.129396,0.003251
2319,0.074721,0.000373,0.129397,0.003252
2320,0.074710,0.000373,0.129394,0.003251


In [22]:
(2104, 0.12915539999999998)

(2104, 0.12915539999999998)

In [23]:
cv_results['test-rmse-mean'].argmin(), cv_results['test-rmse-mean'].min()

(2321, 0.1293906)

### `product_type == 'Investment'`

In [33]:
train_df_0 = train_df[train_df['product_type'] == 0]
test_df_0 = test_df[test_df['product_type'] == 0]

X = train_df_0.drop(['sub_area', 'price_doc'], axis=1).copy()
y = np.log1p(train_df_0['price_doc'])
X_test_0 = test_df_0.drop(['sub_area'], axis=1).copy()

In [34]:
params = {'objective': 'reg:squarederror',
          # 'n_estimators': 10000,
          'tree_method': 'gpu_hist',
          'booster': 'gbtree',
          'base_score': 7,
          'learning_rate': 0.05,
          'max_depth': 4,
          'min_child_weight': 7,
          'subsample': 1,
          'colsample_bytree': 0.9,
          'reg_lambda': 5,
          'reg_alpha': 1,
          'eval_metric': 'rmse',
          'seed': 42,
          'nthread': -1
          }

In [35]:
dtrain = DMatrix(X, label=y)

In [36]:
cv_results = cv(
    params,
    dtrain,
    num_boost_round=5000,
    early_stopping_rounds=50,
    nfold=5,
    shuffle=True,
    metrics={'rmse'},
    verbose_eval=True,
    seed=42,
)

cv_results

[0]	train-rmse:8.40581+0.00066	test-rmse:8.40581+0.00272
[1]	train-rmse:7.98644+0.00063	test-rmse:7.98646+0.00267
[2]	train-rmse:7.58810+0.00060	test-rmse:7.58811+0.00268
[3]	train-rmse:7.20964+0.00057	test-rmse:7.20964+0.00263
[4]	train-rmse:6.85012+0.00054	test-rmse:6.85017+0.00265
[5]	train-rmse:6.50864+0.00052	test-rmse:6.50868+0.00262
[6]	train-rmse:6.18420+0.00049	test-rmse:6.18418+0.00254
[7]	train-rmse:5.87601+0.00047	test-rmse:5.87603+0.00252
[8]	train-rmse:5.58324+0.00045	test-rmse:5.58319+0.00247
[9]	train-rmse:5.30514+0.00043	test-rmse:5.30515+0.00245
[10]	train-rmse:5.04096+0.00041	test-rmse:5.04089+0.00236
[11]	train-rmse:4.79001+0.00038	test-rmse:4.79001+0.00241
[12]	train-rmse:4.55163+0.00036	test-rmse:4.55161+0.00233
[13]	train-rmse:4.32518+0.00035	test-rmse:4.32521+0.00232
[14]	train-rmse:4.11006+0.00033	test-rmse:4.11012+0.00221
[15]	train-rmse:3.90577+0.00032	test-rmse:3.90590+0.00219
[16]	train-rmse:3.71166+0.00031	test-rmse:3.71174+0.00211
[17]	train-rmse:3.52727+

Unnamed: 0,train-rmse-mean,train-rmse-std,test-rmse-mean,test-rmse-std
0,8.405808,0.000658,8.405808,0.002721
1,7.986442,0.000627,7.986459,0.002674
2,7.588097,0.000598,7.588109,0.002683
3,7.209642,0.000570,7.209640,0.002634
4,6.850124,0.000543,6.850174,0.002648
...,...,...,...,...
1126,0.098743,0.000431,0.141380,0.003058
1127,0.098721,0.000433,0.141380,0.003058
1128,0.098700,0.000433,0.141379,0.003063
1129,0.098679,0.000435,0.141379,0.003060


In [37]:
cv_results['test-rmse-mean'].argmin(), cv_results['test-rmse-mean'].min()

0.14137899999999998

### `product_type == 'OwnerOccupier'`

In [26]:
train_df_1 = train_df[train_df['product_type'] == 1]
test_df_1 = test_df[test_df['product_type'] == 1]

X = train_df_1.drop(['sub_area', 'price_doc'], axis=1).copy()
y = np.log1p(train_df_1['price_doc'])
X_test_1 = test_df_1.drop(['sub_area'], axis=1).copy()

In [27]:
params = {'objective': 'reg:squarederror',
          # 'n_estimators': 10000,
          'tree_method': 'gpu_hist',
          'booster': 'gbtree',
          'base_score': 7,
          'learning_rate': 0.05,
          'max_depth': 4,
          'min_child_weight': 7,
          'subsample': 1,
          'colsample_bytree': 0.9,
          'reg_lambda': 5,
          'reg_alpha': 1,
          'eval_metric': 'rmse',
          'seed': 42,
          'nthread': -1
          }

In [28]:
dtrain = DMatrix(X, label=y)

In [None]:
cv_results = cv(
    params,
    dtrain,
    num_boost_round=5000,
    early_stopping_rounds=50,
    nfold=5,
    shuffle=True,
    metrics={'rmse'},
    verbose_eval=True,
    seed=42,
)

cv_results

In [32]:
cv_results['test-rmse-mean'].argmin(), cv_results['test-rmse-mean'].min()

0.0946974

In [38]:
error_sum = ((0.141379**2)*len(train_df[train_df['product_type'] == 0]) + \
             (0.0946974**2)*len(train_df[train_df['product_type'] == 1]))
baseline = np.sqrt(error_sum / len(train_df))
baseline

0.12496445868953714

### `Ensembling`

In [11]:
class my_LGBRegressor(object):
    def __init__(self, params):
        self.params = params

    def fit(self, X, y, w=None):
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)  # random_state=42
        # x_train, y_train, w_train, x_valid, y_valid,  w_valid = X[train_id], y[train_id], w[train_id], X[test_id], y[test_id], w[test_id],
        d_train = lgb.Dataset(X_train, y_train)  # weight=w_train
        d_valid = lgb.Dataset(X_val, y_val)  # weight=w_val

        bst_partial = lgb.train(self.params,
                                d_train, 10000,
                                valid_sets=d_valid,
                                callbacks = [lgb.early_stopping(50)])
                                
        num_round = bst_partial.best_iteration
        d_all = lgb.Dataset(X, label=y)  # weight=w
        self.bst = lgb.train(self.params, d_all, num_round)

    def predict(self, X):
        return self.bst.predict(X)


class my_XGBRegressor(object):
    def __init__(self, params):
        self.params = params

    def fit(self, X, y, w=None):
        # if w == None:
        #    w = np.ones(X.shape[0])
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)  # random_state=42
        d_train = DMatrix(X_train, label=y_train)  # weight = w_train
        d_valid = DMatrix(X_val, label=y_val)  # weight = w_valid

        bst_partial = train_xgb(self.params,
                                d_train,
                                num_boost_round=5000,
                                early_stopping_rounds=50,
                                evals=[(d_train, 'train'), (d_valid, 'val')],
                                verbose_eval=100)

        last_round = bst_partial.best_iteration
        d_all = DMatrix(X, label=y)  # weight = w
        self.bst = train_xgb(self.params,
                             d_all,
                             num_boost_round=last_round,
                             evals=[(d_train, 'train')],
                             verbose_eval=100)

    def predict(self, X_test):
        d_test = DMatrix(X_test)
        return self.bst.predict(d_test)


class Ensemble(object):
    def __init__(self, n_folds, stacker, base_models):
        self.n_folds = n_folds
        self.stacker = stacker
        self.base_models = base_models

    def fit_predict(self, train_df, test_df):
        X = train_df.drop(['sub_area', 'price_doc'], axis=1).values
        y = np.log1p(train_df['price_doc']).values
        # w = train_df['w'].values
        X_test = test_df.drop('sub_area', axis=1).values

        all_df = pd.concat([train_df.drop(['sub_area', 'price_doc'], axis=1), test_df.drop('sub_area', axis=1)])
        imputer = SimpleImputer(strategy='mean')
        imputer.fit(all_df)

        kf = KFold(n_splits=self.n_folds, shuffle=True)  # random_state=42
        folds = list(kf.split(X, y))

        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test = np.zeros((X_test.shape[0], len(self.base_models)))

        for i, clf in enumerate(self.base_models):
            print('Training model: ' + str(type(clf).__name__))
            S_test_i = np.zeros((X_test.shape[0], len(folds)))

            for j, (train_idx, test_idx) in enumerate(folds):
                print('Training round ' + str(j+1) + ' ...')

                if clf not in [xgb_tree, lgb]:
                    X = imputer.transform(X)
                    X_test = imputer.transform(X_test)

                X_train = X[train_idx]
                y_train = y[train_idx]
                # w_train = w[train_idx]
                X_holdout = X[test_idx]
                # w_holdout = w[test_idx]
                # y_holdout = y[test_idx]

                clf.fit(X_train, y_train)  # w_train
                y_pred = clf.predict(X_holdout)

                S_train[test_idx, i] = y_pred
                S_test_i[:, j] = clf.predict(X_test)

            S_test[:, i] = S_test_i.mean(axis=1)

        self.stacker.fit(S_train, y)
        y_pred = self.stacker.predict(S_test)
        return y_pred


In [12]:
X = train_df.drop(['sub_area', 'price_doc'], axis=1).copy()
y = np.log1p(train_df['price_doc'])
X_test = test_df.drop(['sub_area'], axis=1).copy()

In [13]:
params_xgb_tree = {'objective': 'reg:squarederror',
                   'booster': 'gbtree',
                   'tree_method': 'gpu_hist',
                   'base_score': 7,
                   'learning_rate': 0.05,
                   'max_depth': 4,
                   'min_child_weight': 7,
                   'subsample': 1,
                   'colsample_bytree': 0.9,
                   'reg_lambda': 5,
                   'reg_alpha': 1,
                   'eval_metric': 'rmse',
                   'seed': 42,
                   'nthread': -1
                   }


params_xgb_lin = {'objective': 'reg:squarederror',
                  'booster': 'gb_linear',
                  'tree_method': 'gpu_hist',
                  'base_score': 7,
                  'learning_rate': 0.05,
                  'alpha':0,
                  'eval_metric': 'rmse',
                  'seed': 42,
                  'nthread': -1
                  }

params_lgb = {'objective': 'regression', 
              'metric': 'rmse',
              'learning_rate': 0.05, 
              'max_depth': -1, 
              'sub_feature': 0.7, 
              'sub_row': 0.9,
              'num_leaves': 15, 
              'min_data': 30, 
              'max_bin': 20,
              'bagging_freq': 40, 
              'verbosity': 0}

In [14]:
#stacker
xgb_lin = my_XGBRegressor(params_xgb_lin)
# LR = LinearRegression()

#base models
xgb_tree = my_XGBRegressor(params_xgb_tree)
lgb_tree = my_LGBRegressor(params_lgb)
RF = RandomForestRegressor(n_estimators=500, max_depth=5, max_features=0.2, n_jobs=-1)
ETR = ExtraTreesRegressor(n_estimators=500, max_depth=5, max_features=0.3, n_jobs=-1)
Ada = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=5), n_estimators=200)
GBR = GradientBoostingRegressor(n_estimators=200, max_depth=5, max_features=0.5)

E = Ensemble(
    n_folds=5,
    stacker=xgb_lin,
    base_models=[lgb_tree, xgb_tree, RF, ETR, Ada, GBR]
)

y_pred = E.fit_predict(train_df, test_df)

Training model: my_LGBRegressor
Training round 1 ...
[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.842649
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.232622
[LightGBM] [Debug] init for col-wise cost 0.003960 seconds, init for row-wise cost 0.031253 seconds
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5343
[LightGBM] [Info] Number of data points in the train set: 17776, number of used features: 306
[LightGBM] [Info] Start training from score 15.728492
[LightGBM] [Debug] Re-bagging, using 16028 data to train
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
Training until validation scores don't improve for 50 rounds
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightG

Exception ignored on calling ctypes callback function: <function _log_callback at 0x7f21289ef9d0>
Traceback (most recent call last):
  File "/home/stasvlad/.local/lib/python3.8/site-packages/lightgbm/basic.py", line 91, in _log_callback
    def _log_callback(msg: bytes) -> None:
KeyboardInterrupt: 


Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 5
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 7
[LightGBM] [Debug] Trained a tree with leaves = 15 and depth = 6
[LightGBM] [Debug] Trained a tree with leave

## Testing

In [None]:
test_df_nan = test_df[test_df['product_type'].isna()]
X_test_nan = test_df_nan.drop(['sub_area'], axis=1).copy()

In [None]:
submission = pd.read_csv('data/sample_submission.csv', index_col='id')
pred_0 = np.expm1(model_0.predict(X_test_0))
pred_1 = np.expm1(model_1.predict(X_test_1))
pred_nan = np.expm1(model_1.predict(X_test_nan))

if len(pred_0[pred_0 < 0]) or len(pred_1[pred_1 < 0]) or len(pred_nan[pred_nan < 0]):
    print('WARNING: NEGATIVE PREDICTIONS')

In [None]:
submission.loc[X_test_0.index, 'price_doc'] = 0.87*pred_0
submission.loc[X_test_1.index, 'price_doc'] = 0.93*pred_1
submission.loc[X_test_nan.index, 'price_doc'] = 0.95*pred_nan
submission.to_csv('submits/submission.csv', index='id')

In [None]:
# !kaggle competitions submit -c sberbank-russian-housing-market -f "submits/submission.csv" -m "XGBRegressor tuned"

100%|████████████████████████████████████████| 121k/121k [00:02<00:00, 61.0kB/s]
Successfully submitted to Sberbank Russian Housing Market