In [20]:
import pandas as pd
import numpy as np
import json

In [71]:
train = pd.read_csv('../data/train.csv')

In [72]:
train.head()

Unnamed: 0.1,Unnamed: 0,id,atm_group,address,address_rus,lat,long,target
0,0,8526.0,32.0,"EMELYANOVA,34 Y-SAKHALINSK","улица А.О. Емельянова, 34, Южно-Сахалинск, Сах...",46.940995,142.738319,0.0115
1,1,8532.0,32.0,"KOMSOMOLSKAYA,259B Y.SAKHALINSK","Комсомольская улица, 259, Южно-Сахалинск, Саха...",46.937353,142.753348,0.02971
2,2,8533.0,32.0,"KOMMUN. PR., 32 YUZHNO SAKHAL","Коммунистический проспект, Южно-Сахалинск, Сах...",46.959413,142.741113,0.00954
3,3,8684.0,32.0,"LENINGRADSKIY PR.,76A MOSCOW","Ленинградский проспект, 76А, Москва, Россия, 1...",55.805827,37.515146,-0.094035
4,4,37.0,32.0,"GVARDEYSKAYA PL., 2 NORILSK","Гвардейская площадь, 2, Норильск, Красноярский...",69.343541,88.211228,0.079277


In [73]:
train.rename(columns={'Unnamed: 0':'atm_id'}, inplace=True)

In [74]:
train[['lat_rad', 'long_rad']] = np.radians(train[['lat', 'long']])

In [75]:
train['key'] = 0

In [76]:
train.head()

Unnamed: 0,atm_id,id,atm_group,address,address_rus,lat,long,target,lat_rad,long_rad,key
0,0,8526.0,32.0,"EMELYANOVA,34 Y-SAKHALINSK","улица А.О. Емельянова, 34, Южно-Сахалинск, Сах...",46.940995,142.738319,0.0115,0.819275,2.491254,0
1,1,8532.0,32.0,"KOMSOMOLSKAYA,259B Y.SAKHALINSK","Комсомольская улица, 259, Южно-Сахалинск, Саха...",46.937353,142.753348,0.02971,0.819211,2.491516,0
2,2,8533.0,32.0,"KOMMUN. PR., 32 YUZHNO SAKHAL","Коммунистический проспект, Южно-Сахалинск, Сах...",46.959413,142.741113,0.00954,0.819596,2.491302,0
3,3,8684.0,32.0,"LENINGRADSKIY PR.,76A MOSCOW","Ленинградский проспект, 76А, Москва, Россия, 1...",55.805827,37.515146,-0.094035,0.973995,0.654763,0
4,4,37.0,32.0,"GVARDEYSKAYA PL., 2 NORILSK","Гвардейская площадь, 2, Норильск, Красноярский...",69.343541,88.211228,0.079277,1.210273,1.539576,0


In [77]:
for category in ['mall', 'bank', 'department_store', 'station', 'alcohol', 'police', 'university', 'railway_station', 'aeroway_terminal']:
    lat_long = []
    with open(f'../osm_node_{category}.json', encoding='utf8') as f:
        json_data = json.load(f)
        for elem in json_data['elements']:
            if elem['type'] == 'node':
                lat_long.append(
                    [elem['lat'], elem['lon']]
            )
                
    cat_df = pd.DataFrame(lat_long, columns = ['lat', 'long'])
    cat_df[['cat_lat_rad', 'cat_long_rad']] = np.radians(cat_df[['lat', 'long']])
    cat_df['key'] = 0

    cross_merge = train.merge(cat_df, on='key', how='outer')

    # Haversine distance formula
    cross_merge['lat_diff'] = cross_merge['cat_lat_rad'] - cross_merge['lat_rad']
    cross_merge['long_diff'] = cross_merge['cat_long_rad'] - cross_merge['long_rad']
    cross_merge['distance'] = 6378.137 * 2 * np.arcsin(np.sqrt(np.sin(cross_merge['lat_diff']/2.0)**2 + np.cos(cross_merge['lat_rad']) * np.cos(cross_merge['cat_lat_rad']) * np.sin(cross_merge['long_diff']/2.0)**2))

    cross_merge[f'n_{category}'] = (cross_merge['distance'] < 0.3).astype(np.uint8)
    train = train.merge(cross_merge.groupby('atm_id').aggregate({f'n_{category}':'sum'}).reset_index(), on='atm_id', how='left')

In [78]:
train.head()

Unnamed: 0,atm_id,id,atm_group,address,address_rus,lat,long,target,lat_rad,long_rad,key,n_mall,n_bank,n_department_store,n_station,n_alcohol,n_police,n_university,n_railway_station,n_aeroway_terminal
0,0,8526.0,32.0,"EMELYANOVA,34 Y-SAKHALINSK","улица А.О. Емельянова, 34, Южно-Сахалинск, Сах...",46.940995,142.738319,0.0115,0.819275,2.491254,0,0,0,0,0,2,0,0,0,0
1,1,8532.0,32.0,"KOMSOMOLSKAYA,259B Y.SAKHALINSK","Комсомольская улица, 259, Южно-Сахалинск, Саха...",46.937353,142.753348,0.02971,0.819211,2.491516,0,0,2,0,0,1,0,0,0,0
2,2,8533.0,32.0,"KOMMUN. PR., 32 YUZHNO SAKHAL","Коммунистический проспект, Южно-Сахалинск, Сах...",46.959413,142.741113,0.00954,0.819596,2.491302,0,0,2,0,0,2,0,0,0,0
3,3,8684.0,32.0,"LENINGRADSKIY PR.,76A MOSCOW","Ленинградский проспект, 76А, Москва, Россия, 1...",55.805827,37.515146,-0.094035,0.973995,0.654763,0,0,8,0,1,3,2,0,0,0
4,4,37.0,32.0,"GVARDEYSKAYA PL., 2 NORILSK","Гвардейская площадь, 2, Норильск, Красноярский...",69.343541,88.211228,0.079277,1.210273,1.539576,0,0,0,1,0,0,0,0,0,0
