In [1]:
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)
import seaborn as sns
import pickle
import numpy as np
import math

In [2]:
FACTORS = ["area", "final_rooms_encoded", "series_name_encoded", "building_type_str_encoded", 
               "metro_foot_id_encoded", "metro_foot_time",
               "metro_transport_id_encoded", "metro_transport_time",
               "renovation", "kitchen_area", "year", 
               "ponds_time", "ponds_count", "area_segment",
           'building_min', 'building_max', 'building_mean', 'building_std', 'building_median',
'building_min_total', 'building_max_total', 'building_mean_total', 'building_median_total']

In [3]:
HOUSES_DB_PATH = '../../price-estimator-rest-api/data/all.houses.with.sell.price.stat.tsv'

In [4]:
MODEL_PATH = '../../price-estimator-rest-api/model/regions_sell_best_model.dat'

In [5]:
FRAUD_PATH = 'fraud.data.tsv'

In [6]:
ACTIVE_VOS_PATH = 'active.data.tsv'
ACTIVE_VOS_R3_OFFERS_PATH = 'active.data.r3.offers.tsv'

In [7]:
houses_df = pd.read_table(HOUSES_DB_PATH)

In [8]:
model = pickle.load(open(MODEL_PATH, "rb"))

In [9]:
# houses_df.head()

In [10]:
# ls ../../price-estimator-rest-api/model/

In [11]:
fraud_df = pd.read_table(FRAUD_PATH)

In [65]:
def clean_msc_piter(df, address_key = 'address'):
    df = df[~df[address_key].str.contains('Россия, Москва')]
    df = df[~df[address_key].str.contains('Россия, Санкт-Петербург')]
    df = df[~df[address_key].str.contains('Россия, Ленинградская область')]
    df = df[~df[address_key].str.contains('Россия, Московская область')]
    return df
    

In [13]:
print(len(fraud_df))

582


In [14]:
fraud_df = clean_msc_piter(fraud_df)

In [15]:
print(len(fraud_df))

546


In [16]:
final_df = pd.merge(fraud_df, houses_df, left_on = 'address', right_on = 'unified_address')

In [17]:
print(len(fraud_df))

546


In [18]:
print(len(final_df))

347


In [19]:
def encode_rooms(df):
    df['final_rooms_encoded'] = df.rooms
    df.loc[((df.final_rooms_encoded.isnull()) | (df.final_rooms_encoded == 0)) & (df.studio == True), 'final_rooms_encoded'] = 0
    df.loc[((df.final_rooms_encoded.isnull()) | (df.final_rooms_encoded == 0)) & (df.open_plan == True), 'final_rooms_encoded'] = 0


In [20]:
def addTotalPricesBasedOnArea(df):
    df['building_min_total'] = df.building_min * df.area
    df['building_max_total'] = df.building_max * df.area
    df['building_mean_total'] = df.building_mean * df.area
    df['building_median_total'] = df.building_median * df.area
    df['building_var'] = df.building_std ** 2

In [21]:
def add_area_segment(df):
    area_bins = [10,30,50,70,100, 1000]
    df['area_segment'] = pd.cut(df['area'], area_bins, labels=False) + 1

In [22]:
RENOVATION_TO_CODE = {
    'UNKNOWN': 0,
    'DESIGNER_RENOVATION': 1,
    'NEEDS_RENOVATION': 2,
    'NORMAL': 3,
    'PARTIAL_RENOVATION': 4,
    'PRIME_RENOVATION': 5,
    'RENOVATED': 6,
    'COSMETIC_DONE': 7,
    'COSMETIC_REQUIRED': 8,
    'EURO': 10,
    'GOOD': 11,

    'BEFORE_CLEAN': 12,
    'CLEAN': 13,
    'TURNKEY': 14
}
def encode_renovation(df):
    f = lambda x: RENOVATION_TO_CODE.get(x, 0)
    df['renovation'] = df['renovation'].map(f)

In [23]:
encode_renovation(final_df)

In [24]:
final_df.head()

Unnamed: 0,user_id,offer_id,address,offer_type,category,price,renovation,area,kitchen_area,rooms,open_plan,studio,siteId,building_id,year,porches,type,hasGas,isGuarded,flats,floors,heatingType,ceilingHeight,lat,hasRubbishChute,unified_address,expectDemolition,hasSecurity,isBanned,hasLift,seriesId,lon,verba_code,series_name,building_type_str,metro_foot_time,metro_foot_id,metro_transport_time,metro_transport_id,exp_metro_foot_time,exp_metro_foot_id,exp_metro_transport_time,exp_metro_transport_id,ponds_time,ponds_count,center_time,building_mean,building_median,building_min,building_max,building_std,series_name_encoded,building_type_str_encoded,metro_foot_id_encoded,metro_transport_id_encoded,exp_metro_foot_id_encoded,exp_metro_transport_id_encoded,reconstructionYear
0,411394380,1522673411449262337,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",SELL,APARTMENT,2000000,10,68.0,12.0,2,0,0,,755303642207757829,2011.0,,2.0,,,,10.0,,280.0,46.946804,0.0,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",,,,1.0,1564812.0,142.73792,1564812.0,Индивидуальный проект,MONOLIT,1000,0_metro_foot,1000,0_metro_transport,1000,0_exp_metro_foot,1000,0_exp_metro_transport,1000,0,-1,120040.833333,118750.0,102632.0,140351.0,15352.984289,25,4,0,0,0,0,
1,654848724,174899699551081985,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",SELL,APARTMENT,2000000,10,68.0,12.0,2,0,0,,755303642207757829,2011.0,,2.0,,,,10.0,,280.0,46.946804,0.0,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",,,,1.0,1564812.0,142.73792,1564812.0,Индивидуальный проект,MONOLIT,1000,0_metro_foot,1000,0_metro_transport,1000,0_exp_metro_foot,1000,0_exp_metro_transport,1000,0,-1,120040.833333,118750.0,102632.0,140351.0,15352.984289,25,4,0,0,0,0,
2,654838904,7757424879022759680,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",SELL,APARTMENT,2000000,10,68.0,12.0,2,0,0,,755303642207757829,2011.0,,2.0,,,,10.0,,280.0,46.946804,0.0,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",,,,1.0,1564812.0,142.73792,1564812.0,Индивидуальный проект,MONOLIT,1000,0_metro_foot,1000,0_metro_transport,1000,0_exp_metro_foot,1000,0_exp_metro_transport,1000,0,-1,120040.833333,118750.0,102632.0,140351.0,15352.984289,25,4,0,0,0,0,
3,411394380,3077404908959915521,"Россия, Республика Дагестан, Махачкала, Редукторный посёлок, улица Лаптиева, 53",SELL,APARTMENT,2200000,1,89.0,13.0,3,0,0,,5369892064100816816,2008.0,,1.0,,,,14.0,,280.0,42.96692,0.0,"Россия, Республика Дагестан, Махачкала, Редукторный посёлок, улица Лаптиева, 53",,,,1.0,,47.54604,,UNKNOWN,BRICK,1000,0_metro_foot,1000,0_metro_transport,1000,0_exp_metro_foot,1000,0_exp_metro_transport,1000,0,-1,50222.166667,47727.5,44444.0,61017.0,6612.233977,18,1,0,0,0,0,
4,654848724,1376701971863086849,"Россия, Республика Дагестан, Махачкала, Редукторный посёлок, улица Лаптиева, 53",SELL,APARTMENT,1800000,1,79.0,13.0,2,0,0,,5369892064100816816,2008.0,,1.0,,,,14.0,,280.0,42.96692,0.0,"Россия, Республика Дагестан, Махачкала, Редукторный посёлок, улица Лаптиева, 53",,,,1.0,,47.54604,,UNKNOWN,BRICK,1000,0_metro_foot,1000,0_metro_transport,1000,0_exp_metro_foot,1000,0_exp_metro_transport,1000,0,-1,50222.166667,47727.5,44444.0,61017.0,6612.233977,18,1,0,0,0,0,


In [25]:
addTotalPricesBasedOnArea(final_df)

In [26]:
add_area_segment(final_df)

In [27]:
encode_rooms(final_df)

In [28]:
pred = model.predict(final_df[FACTORS])

In [29]:
round_pred = list(map(lambda price: int((price + 500) / 1000) * 1000, pred))

In [49]:
def get_df_for_analysis(df, pred_df, df_address_key = 'address'):
    df = df[['offer_id', df_address_key, 'area', 'price', 'renovation']]
    round_pred = list(map(lambda price: int((price + 500) / 1000) * 1000, pred_df))
    df['pred_price'] = round_pred
    df['price_pred_diff'] = (df['pred_price'] - df['price'])/df['price']
    return df


In [31]:
df_pred = get_df_for_analysis(final_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [32]:
# list(df_pred)

In [33]:
# list(final_df)

In [34]:
df_pred.head()

Unnamed: 0,offer_id,address,area,price,renovation,pred_price,price_pred_diff
0,1522673411449262337,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",68.0,2000000,10,4847000,1.4235
1,174899699551081985,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",68.0,2000000,10,4847000,1.4235
2,7757424879022759680,"Россия, Сахалинская область, Южно-Сахалинск, проспект Победы, 63Г",68.0,2000000,10,4847000,1.4235
3,3077404908959915521,"Россия, Республика Дагестан, Махачкала, Редукторный посёлок, улица Лаптиева, 53",89.0,2200000,1,4897000,1.225909
4,1376701971863086849,"Россия, Республика Дагестан, Махачкала, Редукторный посёлок, улица Лаптиева, 53",79.0,1800000,1,4468000,1.482222


In [35]:
len(df_pred)

347

In [36]:
len(df_pred[df_pred.price_pred_diff > 0.35])

345

In [37]:
df_pred[df_pred.price_pred_diff < 0.35]

Unnamed: 0,offer_id,address,area,price,renovation,pred_price,price_pred_diff
70,2520810321828934912,"Россия, Тула, улица Софьи Перовской, 3",60.0,1600000,10,1851000,0.156875
345,2713771764945546496,"Россия, Томск, Московский тракт, 68",94.0,2000000,1,2635000,0.3175


In [38]:
df_pred[df_pred.price_pred_diff < 0.50]

Unnamed: 0,offer_id,address,area,price,renovation,pred_price,price_pred_diff
70,2520810321828934912,"Россия, Тула, улица Софьи Перовской, 3",60.0,1600000,10,1851000,0.156875
345,2713771764945546496,"Россия, Томск, Московский тракт, 68",94.0,2000000,1,2635000,0.3175


In [39]:
def encode_renovation_float(renovation_float):
    if (renovation_float is np.NaN) or math.isnan(renovation_float):
        renovation_encoded = 0
    else:
        renovation_encoded = int(renovation_float)
    return renovation_encoded


In [40]:
def encode_renovation_df_float(df):
    df['renovation'] = df['renovation'].map(encode_renovation_float)

In [50]:
def get_pred_df(df, houses_df, renovation_is_float = False, df_address_key = 'address'):
    final_df = pd.merge(df, houses_df, left_on = df_address_key, right_on = 'unified_address')
    if renovation_is_float:
        encode_renovation_df_float(final_df)
    else:
        encode_renovation(final_df)
    addTotalPricesBasedOnArea(final_df)
    add_area_segment(final_df)
    encode_rooms(final_df)
    pred = model.predict(final_df[FACTORS])
    df_pred = get_df_for_analysis(final_df, pred, df_address_key = df_address_key)
    return df_pred
    

In [42]:
# active_vos_df = pd.read_table(ACTIVE_VOS_PATH)
active_vos_df = pd.read_table(ACTIVE_VOS_R3_OFFERS_PATH)


In [43]:
active_vos_df.sample(20)

Unnamed: 0,offer_id,unified_address,type,category,price,renovation,area,kitchen_area,rooms,open_plan,studio,creation_date
598,527780084104930560,"Россия, Московская область, Королёв, микрорайон Первомайский, улица Кирова, 48А",1,2,5650000,11.0,76.0,10.0,4,0.0,0.0,2018-06-27 17:15:14
269,4258003757611758848,"Россия, Самара, улица 22 Партсъезда, 7",1,2,1850000,11.0,43.2,6.3,3,0.0,0.0,2018-06-27 17:23:10
572,1364874262769163264,"Россия, Челябинск, улица Университетская Набережная, 64",1,2,1690000,,37.0,,1,0.0,0.0,2018-06-27 17:29:58
1131,1025073525993865728,"Россия, Ленинградская область, Всеволожский район, посёлок Мурино, жилой комплекс Муринский Посад, 3",1,2,1761000,3.0,25.0,,0,,1.0,2018-06-27 00:11:16
1825,909110996875129601,"Россия, Свердловская область, Екатеринбург, Мурзинская улица, 30",1,2,1660000,10.0,31.0,6.0,1,0.0,0.0,2018-06-27 11:03:41
2044,8628304072914580737,"Россия, Пенза, проспект Строителей, 68",1,2,1600000,11.0,35.35,6.7,1,0.0,0.0,2018-06-27 10:52:34
291,2019729305302670592,"Россия, Ростов-на-Дону, поселок Орджоникидзе, улица ТУПОЛЕВА",1,2,1900000,,42.0,,2,0.0,0.0,2018-06-27 17:50:27
2008,7075082489853891328,"Россия, Краснодар, жилой массив Пашковский, Краевая улица, 1/3",1,2,2160000,3.0,46.0,10.0,1,0.0,0.0,2018-06-27 11:43:32
1786,2368223848067644672,"Россия, Московская область, Сергиево-Посадский район, поселок Реммаш, Институтская улица, 14",1,2,2300000,3.0,54.9,6.0,3,0.0,0.0,2018-06-27 10:10:01
1111,7967718448370051585,"Россия, Новосибирск, улица Петухова, 99/1",1,2,1265000,,23.0,,1,0.0,0.0,2018-06-27 06:08:28


In [44]:
# active_vos_df['address_backup'] = active_vos_df['address']

In [45]:

def get_unified_address(row):
    result = row.address_backup
    if not 'Россия' in row.address_backup:
        result = 'Россия, ' + row.address_backup
    return result
        
    


In [46]:
# active_vos_df['address'] = active_vos_df.apply(get_unified_address, )

In [51]:
df_pred_fraud = get_pred_df(fraud_df, houses_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [52]:
df_pred_active = get_pred_df(active_vos_df, houses_df, 
                             renovation_is_float = True, df_address_key = 'unified_address')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [53]:
df_pred_active.sample(30)

Unnamed: 0,offer_id,unified_address,area,price,renovation,pred_price,price_pred_diff
84,3494707465446537985,"Россия, Краснодар, микрорайон Гидростроителей, улица Автолюбителей",56.0,2360000,10,2518000,0.066949
632,3112417087454755584,"Россия, Томск, проспект Мира, 5",36.0,1900000,0,1689000,-0.111053
919,7679693335191045121,"Россия, Киров, улица Героя Николая Рожнева, 4",96.5,4352000,2,3886000,-0.107077
1008,230724849296926977,"Россия, Краснодар, Вологодская улица, 20",77.3,4250000,0,3985000,-0.062353
616,948905888572135937,"Россия, Киров, Современная улица, 13",34.0,1060000,2,1182000,0.115094
441,2310827049709739776,"Россия, Омск, улица Туполева, 5Г",34.2,1400000,7,1535000,0.096429
822,8436905926757550593,"Россия, Московская область, Воскресенск, улица Калинина, 54",45.0,1400000,0,1727000,0.233571
904,325199143467508225,"Россия, Самара, улица Советской Армии, 144",31.0,1950000,0,1765000,-0.094872
134,5761762765792526336,"Россия, Самарская область, Тольятти, улица Толстого, 13",27.0,1200000,0,1037000,-0.135833
93,2926009032464534273,"Россия, Московская область, Раменское, Северное шоссе, к21",39.5,1900000,2,2056000,0.082105


In [58]:
print(len(df_pred_fraud))
print(len(df_pred_fraud[df_pred.price_pred_diff < 0.45]))

347
2


In [60]:
print(len(df_pred_active))
print(len(df_pred_active[df_pred_active.price_pred_diff < 0.45]))

1396
1332


In [61]:
df_pred_active[df_pred_active.price_pred_diff > 0.35]

Unnamed: 0,offer_id,unified_address,area,price,renovation,pred_price,price_pred_diff
50,3844768928362857985,"Россия, Республика Крым, Керчь, улица Будённого, 22",63.00,2750000,0,4498000,0.635636
68,1848602320425510144,"Россия, Красноярск, микрорайон Николаевка, улица Чкалова, 41",139.00,5800000,0,8169000,0.408448
97,6403420819074515969,"Россия, Республика Крым, Ялта, улица Свердлова, 48А",108.00,13027000,0,22503000,0.727412
125,249243374149704448,"Россия, Курган, Половинская улица, 8А",18.00,650000,11,898000,0.381538
130,7512430842049283840,"Россия, Московская область, Серпухов, улица Крюкова, 14",75.70,2850000,0,4169000,0.462807
142,5226386676868258560,"Россия, Вологодская область, Кириллов, улица Урицкого, 12",48.00,850000,3,1229000,0.445882
148,5544479556618407680,"Россия, Пенза, 3-й Подгорный проезд, 6",30.00,750000,0,1242000,0.656000
161,8323669894199319552,"Россия, Краснодар, микрорайон Камвольно-суконный Комбинат, Магистральная улица, 11",64.00,2100000,0,2887000,0.374762
163,4606513874759808768,"Россия, Краснодарский край, Сочи, Курортный проспект, 105",80.00,7000000,0,13141000,0.877286
215,9039260518891515136,"Россия, Республика Дагестан, Махачкала, улица Ирчи Казака, 37А",46.00,736000,0,1173000,0.593750


In [66]:
df_pred_active_cleaned = clean_msc_piter(df_pred_active, address_key='unified_address')

In [68]:
df_pred_active_cleaned[df_pred_active_cleaned.price_pred_diff > 0.35]

Unnamed: 0,offer_id,unified_address,area,price,renovation,pred_price,price_pred_diff
50,3844768928362857985,"Россия, Республика Крым, Керчь, улица Будённого, 22",63.00,2750000,0,4498000,0.635636
68,1848602320425510144,"Россия, Красноярск, микрорайон Николаевка, улица Чкалова, 41",139.00,5800000,0,8169000,0.408448
97,6403420819074515969,"Россия, Республика Крым, Ялта, улица Свердлова, 48А",108.00,13027000,0,22503000,0.727412
125,249243374149704448,"Россия, Курган, Половинская улица, 8А",18.00,650000,11,898000,0.381538
142,5226386676868258560,"Россия, Вологодская область, Кириллов, улица Урицкого, 12",48.00,850000,3,1229000,0.445882
148,5544479556618407680,"Россия, Пенза, 3-й Подгорный проезд, 6",30.00,750000,0,1242000,0.656000
161,8323669894199319552,"Россия, Краснодар, микрорайон Камвольно-суконный Комбинат, Магистральная улица, 11",64.00,2100000,0,2887000,0.374762
163,4606513874759808768,"Россия, Краснодарский край, Сочи, Курортный проспект, 105",80.00,7000000,0,13141000,0.877286
215,9039260518891515136,"Россия, Республика Дагестан, Махачкала, улица Ирчи Казака, 37А",46.00,736000,0,1173000,0.593750
239,1782667675631917057,"Россия, Республика Башкортостан, Уфа, улица Кузнецовский Затон, 20Б",19.75,889000,0,1211000,0.362205


In [69]:
df_pred_active_cleaned[df_pred_active_cleaned.price_pred_diff > 0.35].to_csv('price_less_more_than_35_percent.tsv', index=False, sep='\t')

In [70]:
active_vos_all_df = pd.read_table('active.vos.sell.regions.tsv')

In [71]:
df_pred_active = get_pred_df(active_vos_all_df, houses_df, 
                             renovation_is_float = True, df_address_key = 'unified_address')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [72]:
df_pred_active.sample(20)

Unnamed: 0,offer_id,unified_address,area,price,renovation,pred_price,price_pred_diff
38722,7414422296374227713,"Россия, Рязань, Московский административный округ, район Приокский, Октябрьская улица, 18",68.0,2300000,3,2175000,-0.054348
59758,2995926543921211137,"Россия, Челябинск, Солнечная улица, 22",63.0,1950000,0,2578000,0.322051
38381,858239851266051584,"Россия, Республика Марий Эл, Йошкар-Ола, Вознесенская улица, 34",73.0,3000000,0,2712000,-0.096
15537,8890010347956342272,"Россия, Краснодар, микрорайон Молодёжный, Душистая улица, 79к1",40.0,1850000,7,1738000,-0.060541
59606,5554039335578767361,"Россия, Новгородская область, Малая Вишера, улица Заводской Домострой, 1А",28.0,699000,3,759000,0.085837
43903,3465944150189009665,"Россия, Липецк, улица Максима Горького, 1",51.0,2180000,0,2309000,0.059174
56767,2801119509064502272,"Россия, Владимирская область, Александровский район, Струнино, Заречная улица, 12",50.0,1550000,3,1495000,-0.035484
4022,607737349959106561,"Россия, Забайкальский край, Чита, 9-й микрорайон, 1",68.0,2100000,2,2111000,0.005238
75346,2721627740964669696,"Россия, Самара, Революционная улица, 3",74.0,5300000,10,5974000,0.12717
62550,4911064871050557184,"Россия, Чувашская Республика, Чебоксары, проспект Максима Горького, 41",38.0,1580000,0,1937000,0.225949


In [76]:
print(len(df_pred_active))
print(len(df_pred_active[df_pred_active.price_pred_diff < 0.45]))
print(len(df_pred_active[df_pred_active.price_pred_diff < 0.50]))
print(len(df_pred_active[df_pred_active.price_pred_diff < 0.80]))
print(len(df_pred_active[df_pred_active.price_pred_diff < 0.99]))

93726
90219
90873
92532
92866


In [78]:
print(len(df_pred_active[df_pred_active.price_pred_diff >= 1]))

852


In [80]:
df_pred_active[df_pred_active.price_pred_diff >= 1].to_csv('price_less_more_than_100_percent.tsv', index=False, sep='\t')

In [81]:
df_pred_active[df_pred_active.price_pred_diff >= 1].head()

Unnamed: 0,offer_id,unified_address,area,price,renovation,pred_price,price_pred_diff
71,3149986549106730753,"Россия, Краснодар, Музыкальный микрорайон, улица имени Сергея Есенина",38.0,699000,0,2002000,1.864092
124,5971584411243704064,"Россия, Краснодарский край, Сочи, микрорайон Мамайка, Волжская улица",30.0,900000,3,3345000,2.716667
125,1439373001459637249,"Россия, Краснодарский край, Сочи, микрорайон Мамайка, Волжская улица",29.0,1200000,3,3011000,1.509167
126,2485168474424530176,"Россия, Краснодарский край, Сочи, микрорайон Мамайка, Волжская улица",28.0,1100000,3,3138000,1.852727
130,2979002486036103936,"Россия, Краснодарский край, Сочи, микрорайон Мамайка, Волжская улица",27.0,1161000,3,2984000,1.570198


In [None]:
print(len(df_pred_active[df_pred_active.price_pred_diff >= 1]))