In [13]:
import sys
import pandas as pd
from tqdm import tqdm_notebook
import time
import numpy as np

sys.path.append('proto.out')
import price_prediction_pb2
import common_pb2
import subprocess

TESTING_HOST = 'realty-price-estimator.vrts-slb.test.vertis.yandex.net'
LOCALHOST = 'localhost:8895'

def get_price_request(row, is_corrupted):
    GeoInfo = price_prediction_pb2.GeoInfo()
    price_prediction_request = price_prediction_pb2.PricePredictionRequest()
    price_prediction_request.offer_id = str(row.offer_id)
    # print(common_pb2.OfferType)
    price_prediction_request.offer_type = row.offer_type
    # print(price_prediction_request.offer_type)
    price_prediction_request.offer_category = common_pb2.APARTMENT
    price_prediction_request.rooms_total = row.rooms
    price_prediction_request.area = row.area
    price_prediction_request.kitchen_space = row.kitchen_area
    price_prediction_request.price = int(row.first_price)
    price_prediction_request.building_id = ""
    price_prediction_request.description = '''temp description'''
    price_prediction_request.renovation = int(row.renovation)
    price_prediction_request.subject_federation_id = int(row.subject_federation_id)
    price_prediction_request.geocoder_address = row.unified_address + ('temp' * is_corrupted)
    
    price_prediction_request.geo_info.latitude = row.lat
    price_prediction_request.geo_info.longitude = row.lon
    price_prediction_request.geo_info.geocoder_id = 0
    price_prediction_request.geo_info.locality_name = row.locality_name
    price_prediction_request.geo_info.subject_federation_id = int(row.subject_federation_id)
    price_prediction_request.geo_info.geocoder_address = row.unified_address + ('temp' * is_corrupted)
    return price_prediction_request


def write_request_proto_to_file(pred_request, output_file='proto.request.example.binary'):
    global result
    result = pred_request.SerializeToString()
    # print(str(pred_request))
    # print(result)
    file = open(output_file, 'w+b')
    file.write(result)
    file.close()

# print(common_pb2.RENOVATION_TURNKEY)
# print(common_pb2.OfferType.Name(common_pb2.SELL))


def test_one_request_api(price_pred_request,
                         request_filepath = 'proto.request.rent.example.binary',
                         response_filepath = 'proto.response.rent.example.binary',
                         hostname = LOCALHOST):
    write_request_proto_to_file(price_pred_request, output_file=request_filepath)
    #print("calling REST API protobuf:")
    curl_call_string = 'curl -s --header "Content-Type: application/protobuf" --request POST --data-binary @' + request_filepath + ' http://' + hostname + '/api/v2/get_price > ' + response_filepath
    #print(curl_call_string)
    subprocess.check_call(
        curl_call_string, shell='True')
    price_resp = price_prediction_pb2.PricePredictionResponse()
    read_file = open(response_filepath, 'r+b')
    binary_string = read_file.read()
    price_resp.ParseFromString(binary_string)
    # print(str(price_resp))
    
RENT = 2
SELL = 1

MSK_MO_SUBJECT_FEDERATION_ID = 1
PITER_LO_SUBJECT_FEDERATION_ID = 10174

df = pd.read_csv('../price-estimator-rest-api/data/eval_df.csv',sep='\t')
df = df[df['locality_name'].notnull()]
df = df.sample(frac=1)

i = 0
for row in tqdm_notebook(df.itertuples()):
    if row.subject_federation_id == MSK_MO_SUBJECT_FEDERATION_ID:
        a = 0 
    elif row.subject_federation_id == PITER_LO_SUBJECT_FEDERATION_ID:
        a = 0
    elif row.subject_federation_id != MSK_MO_SUBJECT_FEDERATION_ID \
        and row.subject_federation_id != PITER_LO_SUBJECT_FEDERATION_ID \
        and row.offer_type == SELL:
        a = 0
    else:
        continue
    price_prediction_request = get_price_request(row, False)
    test_one_request_api(price_prediction_request, request_filepath ='./proto/proto.request{}.example.binary'.format(i),
                        response_filepath = './proto/proto.response{}.example.binary'.format(i))
    
    price_prediction_request = get_price_request(row, True)
    test_one_request_api(price_prediction_request, 
                        request_filepath ='./proto/proto.request_corrupred{}.example.corrupted.binary'.format(i),
                        response_filepath = './proto/proto.response_corrupred{}.example.corrupted.binary'.format(i))
    i += 1
    if i > 2000:
        break

  interactivity=interactivity, compiler=compiler, result=result)


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

KeyboardInterrupt: 

In [51]:
with open('b.out') as f:
    logs = f.readlines()[7:-2]
result_id = []
result_pred = []
for row in logs:
    if 'processing offer: ' in row:
        result_id.append(int(row.split(': ')[1][:-1]))
    if 'Predict exec time: ' in row:
        result_pred.append(int(row.split(' ')[-2]))
        

In [57]:
predicted = pd.DataFrame(data=result_pred[1::2],index=result_id[1::2], columns=['grid'])
predicted['model'] = result_pred[::2]
df.index = df['offer_id']
predicted['true'] = df.loc[predicted.index,'first_price']

In [62]:
def error_percentage(y_test, y_pred):
    return sum((np.divide(y_pred,y_test) - 1).apply(abs) < 0.15) / y_pred.shape[0]

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [64]:
mean_absolute_percentage_error(predicted['true'], predicted['grid'])

20.191464978166415

In [65]:
mean_absolute_percentage_error(predicted['true'], predicted['model'])

15.983706091517833