In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import json

from rating.validator import union_addresses, extract_addresses
from rating.ya_maps_client import YandexApiClient, Request, Response
from rating.address import Address
from rating.utils import get_city_



fontsize = 18
figsize = (15,10)
plot_alpha = 0.3
linewidth = 4

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
def rmse(y_true, y_pred):
    return sqrt(mean_squared_error(y_true, y_pred))

In [3]:
train = pd.read_csv('data/train.csv', index_col=0)
test = pd.read_csv('data/test.csv', index_col=0)

In [4]:
test = test.rename(columns={'lat': 'lat', 'long': 'long'})
train = train.rename(columns={'lat': 'lat', 'long': 'long'})

### Создаём общие данные tran-test

In [5]:
data_all = train.append(test, sort=False)

### Фиксируем невалидные адреса

In [6]:
data_all['address_fail'] = 0
data_all.at[data_all['address_rus'].isnull(), 'address_fail'] = 1

### Геокодинг

In [7]:
inds = data_all[data_all['address_fail'] == 1]['address'].index
addrs = data_all[data_all['address_fail'] == 1]['address'].values

reqs = []
for ind, addr in zip(inds, addrs):
    reqs.append(Request(addr, ind))

In [8]:
token = 'YOUR-YANDEX-MAPS-API-TOKEN'
ya_map_client = YandexApiClient(token)

In [9]:
resps_en = ya_map_client.geocode(reqs)
resps_ru = ya_map_client.translit_geocode(reqs)

100%|██████████| 420/420 [01:44<00:00,  4.10it/s]
100%|██████████| 420/420 [01:43<00:00,  3.58it/s]


### Если были ошибки сервера, то здесь можно их поправить, указав порядковый номер

In [10]:
# index = [i for i, item in enumerate(resps_en) if item.index == 5087][0]  # 5087 -- порядковый номер
# obj = data_all[data_all['address_fail'].astype(bool)]['address'].values[index]
# resps_en[index] = Response(ya_map_client.geocode_i(obj), index)
# resps_ru[index] = Response(ya_map_client.translit_geocode_i(obj), index)

### Валидация адресов по домам и совпадениям

In [10]:
valid_addresses, invalid_addresses = extract_addresses(resps_en, resps_ru)

Valid: 171. Invalid: 249.


### Сохранение данных

In [16]:
res = []
for i, item in enumerate(zip(valid_addresses, invalid_addresses)):
    valid_address = item[0]
    invalid_address = item[1]
    if valid_address is None and invalid_address is None:
        raise Exception('Fail %d' % i)
    if valid_address is not None and invalid_address is not None:
        raise Exception('Fail %d' % i)
    if valid_address is not None:
        res.append((valid_address.index, valid_address.get_best_coords()))
    if invalid_address is not None:
        res.append((invalid_address.index, {'long': 0, 'lat': 0}))
    

In [17]:
with open('data/missed_coords.json', 'w') as out:
    json.dump(res, out)