In [1]:
from utils import *
from features import *

import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_log_error, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler

import lightgbm as lgb
from xgboost import XGBRegressor, DMatrix, cv
from xgboost import train as train_xgb

## Data description

In [2]:
macro_df = pd.read_csv('data/macro.csv', parse_dates=['timestamp'])
train_df = pd.read_csv('data/train.csv', index_col='id', parse_dates=['timestamp'])
test_df = pd.read_csv('data/test.csv', index_col='id', parse_dates=['timestamp'])

tverskoe_issue_fix(train_df)
tverskoe_issue_fix(test_df)

Fix:  550
Fix:  149


## 1. Data preprocessing
## I part (encoding and correcting mistakes)

### Macro dataset

In [3]:
macro_df['child_on_acc_pre_school'] = macro_df['child_on_acc_pre_school'].str.replace('#!', 'nan')
for column in macro_df.select_dtypes('object').columns:
    macro_df[column] = macro_df[column].str.replace(',', '.')
    macro_df[column] = macro_df[column].astype(float)

if not len(macro_df.select_dtypes('object').columns):
    print('OK')

OK


### Train dataset

In [4]:
train_df = encode(train_df)

### Test dataset

In [5]:
test_df = encode(test_df)

## II part (Filling missing values)

XGBRegressor model handles `np.NaN` values itself

## 2. Encoding `sub_area` feature

In [6]:
coords_train_df = pd.read_csv('data/geo/train_lat_lon.csv')
coords_train_df.drop(['key', 'tolerance_m'], axis=1, inplace=True)
coords_train_df.index = coords_train_df.id
coords_train_df.drop(['id'], axis=1, inplace=True)
coords_train_df = coords_train_df.sort_index()

coords_test_df = pd.read_csv('data/geo/test_lat_lon.csv')
coords_test_df.drop(['key', 'tolerance_m'], axis=1, inplace=True)
coords_test_df.index = coords_test_df.id
coords_test_df.drop(['id'], axis=1, inplace=True)
coords_test_df = coords_test_df.sort_index()

coords_all_df = pd.concat([coords_train_df, coords_test_df])

In [7]:
train_df['is_train'] = 1
test_df['is_train'] = 0

# coords_df = pd.read_csv('data/coords.csv', index_col='id')
all_df = pd.concat([train_df, test_df])

all_df['latitude'] = coords_all_df['lat']
all_df['longitude'] = coords_all_df['lon']

## 3. Removing outliers

In [8]:
all_df = remove_outliers(all_df)

## 4. Feature engineering

In [9]:
all_df = create_new_features(all_df)

## 5. Removing fake prices

In [10]:
train_df = all_df[all_df['is_train'] == 1].drop(['is_train'], axis=1)
test_df = all_df[all_df['is_train'] == 0].drop(['is_train', 'price_doc'], axis=1)

In [11]:
train_df = remove_fake_prices(train_df)

REMOVED: 35


In [12]:
idx_outliers = np.loadtxt('outliers/idx_outliers_full.txt').astype(int)
train_df = train_df.drop(idx_outliers)

## BONUS: Merging `macro.csv` with main

In [13]:
macro_df['timestamp_day'] = macro_df['timestamp'].dt.day
macro_df['timestamp_month'] = macro_df['timestamp'].dt.month
macro_df['timestamp_year'] = macro_df['timestamp'].dt.year
macro_df = macro_df.drop('timestamp', axis=1)

In [14]:
train_macro_df = train_df.merge(macro_df, on=['timestamp_day', 'timestamp_month', 'timestamp_year'], how='left' )
test_macro_df = test_df.merge(macro_df, on=['timestamp_day', 'timestamp_month', 'timestamp_year'], how='left' )

## 6.1 Normalizing prices (`product_type == 'Investment'`)

In [15]:
train_df_0 = train_df[train_df['product_type'] == 0].copy()

In [16]:
train_2011_q3_index = train_df_0.loc[train_df_0['timestamp_year'] == 2011].loc[train_df_0['timestamp_month'] >= 7].loc[train_df_0['timestamp_month'] < 10].index
train_2011_q4_index = train_df_0.loc[train_df_0['timestamp_year'] == 2011].loc[train_df_0['timestamp_month'] >= 10].loc[train_df_0['timestamp_month'] <= 12].index
train_2012_q1_index = train_df_0.loc[train_df_0['timestamp_year'] == 2012].loc[train_df_0['timestamp_month'] >= 1].loc[train_df_0['timestamp_month'] < 4].index
train_2012_q2_index = train_df_0.loc[train_df_0['timestamp_year'] == 2012].loc[train_df_0['timestamp_month'] >= 4].loc[train_df_0['timestamp_month'] < 7].index
train_2012_q3_index = train_df_0.loc[train_df_0['timestamp_year'] == 2012].loc[train_df_0['timestamp_month'] >= 7].loc[train_df_0['timestamp_month'] < 10].index
train_2012_q4_index = train_df_0.loc[train_df_0['timestamp_year'] == 2012].loc[train_df_0['timestamp_month'] >= 10].loc[train_df_0['timestamp_month'] <= 12].index
train_2013_q1_index = train_df_0.loc[train_df_0['timestamp_year'] == 2013].loc[train_df_0['timestamp_month'] >= 1].loc[train_df_0['timestamp_month'] < 4].index
train_2013_q2_index = train_df_0.loc[train_df_0['timestamp_year'] == 2013].loc[train_df_0['timestamp_month'] >= 4].loc[train_df_0['timestamp_month'] < 7].index
train_2013_q3_index = train_df_0.loc[train_df_0['timestamp_year'] == 2013].loc[train_df_0['timestamp_month'] >= 7].loc[train_df_0['timestamp_month'] < 10].index
train_2013_q4_index = train_df_0.loc[train_df_0['timestamp_year'] == 2013].loc[train_df_0['timestamp_month'] >= 10].loc[train_df_0['timestamp_month'] <= 12].index
train_2014_q1_index = train_df_0.loc[train_df_0['timestamp_year'] == 2014].loc[train_df_0['timestamp_month'] >= 1].loc[train_df_0['timestamp_month'] < 4].index
train_2014_q2_index = train_df_0.loc[train_df_0['timestamp_year'] == 2014].loc[train_df_0['timestamp_month'] >= 4].loc[train_df_0['timestamp_month'] < 7].index
train_2014_q3_index = train_df_0.loc[train_df_0['timestamp_year'] == 2014].loc[train_df_0['timestamp_month'] >= 7].loc[train_df_0['timestamp_month'] < 10].index
train_2014_q4_index = train_df_0.loc[train_df_0['timestamp_year'] == 2014].loc[train_df_0['timestamp_month'] >= 10].loc[train_df_0['timestamp_month'] <= 12].index
train_2015_q1_index = train_df_0.loc[train_df_0['timestamp_year'] == 2015].loc[train_df_0['timestamp_month'] >= 1].loc[train_df_0['timestamp_month'] < 4].index
train_2015_q2_index = train_df_0.loc[train_df_0['timestamp_year'] == 2015].loc[train_df_0['timestamp_month'] >= 4].loc[train_df_0['timestamp_month'] < 7].index

In [17]:
train_q_idx = [train_2011_q3_index, train_2011_q4_index, train_2012_q1_index,
               train_2012_q2_index, train_2012_q3_index, train_2012_q4_index,
               train_2013_q1_index, train_2013_q2_index, train_2013_q3_index,
               train_2013_q4_index, train_2014_q1_index, train_2014_q2_index,
               train_2014_q3_index, train_2014_q4_index, train_2015_q1_index,
               train_2015_q2_index]

In [19]:
sum(len(q) for q in train_q_idx) == len(train_df_0)

True

In [20]:
train_q_idx.pop()

Int64Index([28765, 28771, 28772, 28774, 28775, 28777, 28778, 28781, 28783,
            28784,
            ...
            30455, 30457, 30460, 30463, 30464, 30466, 30469, 30470, 30472,
            30473],
           dtype='int64', name='id', length=716)

In [21]:
sum(len(q) for q in train_q_idx) == len(train_df_0) - len(train_df.loc[train_2015_q2_index])

True

In [22]:
for q in train_q_idx:
    print((train_df.loc[q].price_doc / train_df.loc[q].full_sq).mean())

146003.66978784843
147007.56866771728
154123.60472626315
156463.75911237008
157370.6808427691
158166.67377496415
160119.11724372697
159137.1708521332
163726.89049531604
158594.24925726495
161002.8972664781
165221.11492517497
165168.61430014294
169940.21707769358
173190.639734523


In [23]:
train_df['average_q_price'] = 1

In [24]:
train_df.loc[train_2015_q2_index, 'average_q_price'] = 1
base_price = (train_df.loc[train_2015_q2_index].price_doc / train_df.loc[train_2015_q2_index].full_sq).mean()
for q in train_q_idx:
    train_df.loc[q, 'average_q_price'] = base_price / (train_df.loc[q].price_doc / train_df.loc[q].full_sq).mean() 


## 6.1 Normalizing prices (`product_type == 'OwnerOccupied'`)

In [34]:
train_df_1 = train_df[train_df['product_type'] == 1].copy()

In [35]:
train_2011_q3_index = train_df_1.loc[train_df_1['timestamp_year'] == 2011].loc[train_df_1['timestamp_month'] >= 7].loc[train_df_1['timestamp_month'] < 10].index
train_2011_q4_index = train_df_1.loc[train_df_1['timestamp_year'] == 2011].loc[train_df_1['timestamp_month'] >= 10].loc[train_df_1['timestamp_month'] <= 12].index
train_2012_q1_index = train_df_1.loc[train_df_1['timestamp_year'] == 2012].loc[train_df_1['timestamp_month'] >= 1].loc[train_df_1['timestamp_month'] < 4].index
train_2012_q2_index = train_df_1.loc[train_df_1['timestamp_year'] == 2012].loc[train_df_1['timestamp_month'] >= 4].loc[train_df_1['timestamp_month'] < 7].index
train_2012_q3_index = train_df_1.loc[train_df_1['timestamp_year'] == 2012].loc[train_df_1['timestamp_month'] >= 7].loc[train_df_1['timestamp_month'] < 10].index
train_2012_q4_index = train_df_1.loc[train_df_1['timestamp_year'] == 2012].loc[train_df_1['timestamp_month'] >= 10].loc[train_df_1['timestamp_month'] <= 12].index
train_2013_q1_index = train_df_1.loc[train_df_1['timestamp_year'] == 2013].loc[train_df_1['timestamp_month'] >= 1].loc[train_df_1['timestamp_month'] < 4].index
train_2013_q2_index = train_df_1.loc[train_df_1['timestamp_year'] == 2013].loc[train_df_1['timestamp_month'] >= 4].loc[train_df_1['timestamp_month'] < 7].index
train_2013_q3_index = train_df_1.loc[train_df_1['timestamp_year'] == 2013].loc[train_df_1['timestamp_month'] >= 7].loc[train_df_1['timestamp_month'] < 10].index
train_2013_q4_index = train_df_1.loc[train_df_1['timestamp_year'] == 2013].loc[train_df_1['timestamp_month'] >= 10].loc[train_df_1['timestamp_month'] <= 12].index
train_2014_q1_index = train_df_1.loc[train_df_1['timestamp_year'] == 2014].loc[train_df_1['timestamp_month'] >= 1].loc[train_df_1['timestamp_month'] < 4].index
train_2014_q2_index = train_df_1.loc[train_df_1['timestamp_year'] == 2014].loc[train_df_1['timestamp_month'] >= 4].loc[train_df_1['timestamp_month'] < 7].index
train_2014_q3_index = train_df_1.loc[train_df_1['timestamp_year'] == 2014].loc[train_df_1['timestamp_month'] >= 7].loc[train_df_1['timestamp_month'] < 10].index
train_2014_q4_index = train_df_1.loc[train_df_1['timestamp_year'] == 2014].loc[train_df_1['timestamp_month'] >= 10].loc[train_df_1['timestamp_month'] <= 12].index
train_2015_q1_index = train_df_1.loc[train_df_1['timestamp_year'] == 2015].loc[train_df_1['timestamp_month'] >= 1].loc[train_df_1['timestamp_month'] < 4].index
train_2015_q2_index = train_df_1.loc[train_df_1['timestamp_year'] == 2015].loc[train_df_1['timestamp_month'] >= 4].loc[train_df_1['timestamp_month'] < 7].index

In [36]:
train_q_idx = [train_2011_q3_index, train_2011_q4_index, train_2012_q1_index,
               train_2012_q2_index, train_2012_q3_index, train_2012_q4_index,
               train_2013_q1_index, train_2013_q2_index, train_2013_q3_index,
               train_2013_q4_index, train_2014_q1_index, train_2014_q2_index,
               train_2014_q3_index, train_2014_q4_index, train_2015_q1_index,
               train_2015_q2_index]

In [37]:
sum(len(q) for q in train_q_idx) == len(train_df_1)

True

In [38]:
train_q_idx.pop()

Int64Index([28763, 28764, 28766, 28767, 28768, 28770, 28773, 28776, 28779,
            28780,
            ...
            30453, 30454, 30456, 30458, 30461, 30462, 30465, 30467, 30468,
            30471],
           dtype='int64', name='id', length=896)

In [39]:
sum(len(q) for q in train_q_idx) == len(train_df_1) - len(train_df.loc[train_2015_q2_index])

True

In [40]:
for q in train_q_idx:
    print((train_df.loc[q].price_doc / train_df.loc[q].full_sq).mean())

113073.97260273973
136145.3975765336
147778.72739137296
138993.9483576403
96237.5923084805
90742.56271618714
98639.39428290667
99585.48477661972
104655.732082085
107174.24678825888
106913.52490299725
111861.94168330009
115122.04022919902
118082.13624021263
120242.83070880704


In [41]:
train_df.loc[train_2015_q2_index, 'average_q_price'] = 1
base_price = (train_df.loc[train_2015_q2_index].price_doc / train_df.loc[train_2015_q2_index].full_sq).mean()
for q in train_q_idx:
    train_df.loc[q, 'average_q_price'] = base_price / (train_df.loc[q].price_doc / train_df.loc[q].full_sq).mean() 


In [42]:
train_df['price_doc'] = train_df['price_doc'] * train_df['average_q_price']

## 7. Modeling

### `product_type == 'Investment'`

In [43]:
train_df_0 = train_df[train_df['product_type'] == 0]
test_df_0 = test_df[test_df['product_type'] == 0]

X = train_df_0.drop(['sub_area', 'price_doc', 'average_q_price'], axis=1).copy()
y = np.log1p(train_df_0['price_doc'])
X_test_0 = test_df_0.drop(['sub_area'], axis=1).copy()

In [44]:
# train_macro_df_0 = train_macro_df[train_macro_df['product_type'] == 0]
# test_macro_df_0 = test_macro_df[test_macro_df['product_type'] == 0]

# X = train_macro_df_0.drop(['sub_area', 'price_doc'], axis=1).copy()
# y = np.log1p(train_macro_df_0['price_doc'])
# X_test_0 = test_macro_df_0.drop(['sub_area'], axis=1).copy()

In [45]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape

((13416, 369), (3354, 369))

In [47]:
params = {'objective': 'reg:squarederror',
          'tree_method': 'gpu_hist',
          'booster': 'gbtree',
          'n_estimators': 10000,
          'base_score': 5,
          'learning_rate': 0.05,
          'max_depth': 4,
          'min_child_weight': 7,
          'subsample': 1,
          'colsample_bytree': 0.9,
          'reg_lambda': 5,
          'reg_alpha': 1,
          'eval_metric': 'rmse',
          'seed': 42,
          'nthread': -1
          }

model_0 = XGBRegressor(**params)

model_0.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], early_stopping_rounds=50)

[0]	validation_0-rmse:10.30390	validation_1-rmse:10.29722
[1]	validation_0-rmse:9.78959	validation_1-rmse:9.78336
[2]	validation_0-rmse:9.30100	validation_1-rmse:9.29508
[3]	validation_0-rmse:8.83695	validation_1-rmse:8.83106
[4]	validation_0-rmse:8.39600	validation_1-rmse:8.39050
[5]	validation_0-rmse:7.97714	validation_1-rmse:7.97177
[6]	validation_0-rmse:7.57924	validation_1-rmse:7.57399
[7]	validation_0-rmse:7.20121	validation_1-rmse:7.19584
[8]	validation_0-rmse:6.84209	validation_1-rmse:6.83699
[9]	validation_0-rmse:6.50095	validation_1-rmse:6.49574
[10]	validation_0-rmse:6.17688	validation_1-rmse:6.17190
[11]	validation_0-rmse:5.86903	validation_1-rmse:5.86409
[12]	validation_0-rmse:5.57659	validation_1-rmse:5.57188
[13]	validation_0-rmse:5.29884	validation_1-rmse:5.29421
[14]	validation_0-rmse:5.03497	validation_1-rmse:5.03037
[15]	validation_0-rmse:4.78432	validation_1-rmse:4.77973
[16]	validation_0-rmse:4.54620	validation_1-rmse:4.54164
[17]	validation_0-rmse:4.31999	validati

XGBRegressor(base_score=5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.9, eval_metric='rmse',
             gamma=0, gpu_id=0, importance_type='gain',
             interaction_constraints='', learning_rate=0.05, max_delta_step=0,
             max_depth=4, min_child_weight=7, missing=nan,
             monotone_constraints='()', n_estimators=10000, n_jobs=8,
             nthread=-1, num_parallel_tree=1, random_state=42, reg_alpha=1,
             reg_lambda=5, scale_pos_weight=1, seed=42, subsample=1,
             tree_method='gpu_hist', validate_parameters=1, verbosity=None)

In [48]:
y_pred_train = np.abs(model_0.predict(X_train))
y_pred_val = np.abs(model_0.predict(X_val))
print('___________________________________')
print('      RMSLE\n')
print('      TRAIN_0:', np.sqrt(mean_squared_log_error(np.expm1(y_train), np.expm1(y_pred_train))))
print(' VALIDATION_0:', np.sqrt(mean_squared_log_error(np.expm1(y_val), np.expm1(y_pred_val))))
print('___________________________________')
print('      MAE\n')
print('      TRAIN_0:', mean_absolute_error(np.expm1(y_train), np.expm1(y_pred_train)))
print(' VALIDATION_0:', mean_absolute_error(np.expm1(y_val), np.expm1(y_pred_val)))

___________________________________
      RMSLE

      TRAIN_0: 0.09898269550506784
 VALIDATION_0: 0.14076501291145632
___________________________________
      MAE

      TRAIN_0: 545341.6485819172
 VALIDATION_0: 824636.6058810375


### `product_type == 'OwnerOccupier'`

In [49]:
train_df_1 = train_df[train_df['product_type'] == 1]
test_df_1 = test_df[test_df['product_type'] == 1]

X = train_df_1.drop(['sub_area', 'price_doc', 'average_q_price'], axis=1).copy()
y = np.log1p(train_df_1['price_doc'])
X_test_1 = test_df_1.drop(['sub_area'], axis=1).copy()

In [50]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_val.shape

((8793, 369), (2199, 369))

In [52]:
params = {'objective': 'reg:squarederror',
          'n_estimators': 10000,
          'tree_method': 'gpu_hist',
          'booster': 'gbtree',
          'base_score': 7,
          'learning_rate': 0.05,
          'max_depth': 4,
          'min_child_weight': 7,
          'subsample': 1,
          'colsample_bytree': 0.9,
          'reg_lambda': 15,
          'reg_alpha': 1,
          'eval_metric': 'rmse',
          'seed': 42,
          'nthread': -1
          }

model_1 = XGBRegressor(**params)

model_1.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], early_stopping_rounds=50)

[0]	validation_0-rmse:8.29537	validation_1-rmse:8.27935
[1]	validation_0-rmse:7.88250	validation_1-rmse:7.86642
[2]	validation_0-rmse:7.49029	validation_1-rmse:7.47416
[3]	validation_0-rmse:7.11773	validation_1-rmse:7.10154
[4]	validation_0-rmse:6.76383	validation_1-rmse:6.74758
[5]	validation_0-rmse:6.42767	validation_1-rmse:6.41135
[6]	validation_0-rmse:6.10836	validation_1-rmse:6.09198
[7]	validation_0-rmse:5.80501	validation_1-rmse:5.78890
[8]	validation_0-rmse:5.51682	validation_1-rmse:5.50086
[9]	validation_0-rmse:5.24306	validation_1-rmse:5.22734
[10]	validation_0-rmse:4.98296	validation_1-rmse:4.96705
[11]	validation_0-rmse:4.73587	validation_1-rmse:4.72008
[12]	validation_0-rmse:4.50111	validation_1-rmse:4.48514
[13]	validation_0-rmse:4.27810	validation_1-rmse:4.26206
[14]	validation_0-rmse:4.06621	validation_1-rmse:4.05039
[15]	validation_0-rmse:3.86494	validation_1-rmse:3.84906
[16]	validation_0-rmse:3.67371	validation_1-rmse:3.65840
[17]	validation_0-rmse:3.49205	validation

XGBRegressor(base_score=7, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.9, eval_metric='rmse',
             gamma=0, gpu_id=0, importance_type='gain',
             interaction_constraints='', learning_rate=0.05, max_delta_step=0,
             max_depth=4, min_child_weight=7, missing=nan,
             monotone_constraints='()', n_estimators=10000, n_jobs=8,
             nthread=-1, num_parallel_tree=1, random_state=42, reg_alpha=1,
             reg_lambda=15, scale_pos_weight=1, seed=42, subsample=1,
             tree_method='gpu_hist', validate_parameters=1, verbosity=None)

In [53]:
y_pred_train = np.abs(model_1.predict(X_train))
y_pred_val = np.abs(model_1.predict(X_val))
print('___________________________________')
print('      RMSLE\n')
print('      TRAIN_1:', np.sqrt(mean_squared_log_error(np.expm1(y_train), np.expm1(y_pred_train))))
print(' VALIDATION_1:', np.sqrt(mean_squared_log_error(np.expm1(y_val), np.expm1(y_pred_val))))
print('___________________________________')
print('      MAE\n')
print('      TRAIN_1:', mean_absolute_error(np.expm1(y_train), np.expm1(y_pred_train)))
print(' VALIDATION_1:', mean_absolute_error(np.expm1(y_val), np.expm1(y_pred_val)))

___________________________________
      RMSLE

      TRAIN_1: 0.05212436285230924
 VALIDATION_1: 0.08872533089650055
___________________________________
      MAE

      TRAIN_1: 286834.67579061724
 VALIDATION_1: 450419.1588347689


In [54]:
test_df_nan = test_df[test_df['product_type'].isna()]
X_test_nan = test_df_nan.drop(['sub_area'], axis=1).copy()

In [55]:
submission = pd.read_csv('data/submits/sample_submission.csv', index_col='id')
pred_0 = np.expm1(model_0.predict(X_test_0))
pred_1 = np.expm1(model_1.predict(X_test_1))
pred_nan = np.expm1(model_1.predict(X_test_nan))

if len(pred_0[pred_0 < 0]) or len(pred_1[pred_1 < 0]) or len(pred_nan[pred_nan < 0]):
    print('WARNING: NEGATIVE PREDICTIONS')

In [70]:
submission.loc[X_test_0.index, 'price_doc'] = 0.88*pred_0 # 0.87
submission.loc[X_test_1.index, 'price_doc'] = 0.90*pred_1 # 0.93
submission.loc[X_test_nan.index, 'price_doc'] = pred_nan # 0.95
submission.to_csv('data/submits/submission.csv', index='id')

In [71]:
!kaggle competitions submit -c sberbank-russian-housing-market -f "data/submits/submission.csv" -m "XGBRegressor normalized and splitted"

100%|████████████████████████████████████████| 121k/121k [00:01<00:00, 63.0kB/s]
Successfully submitted to Sberbank Russian Housing Market