In [None]:
import pandas as pd
import ast
tens = pd.read_csv('tenancies_full.csv')

def rent_from_expenses(expenses_str):
    expenses = ast.literal_eval(expenses_str)
    amount = 0
    for expense in expenses:
        if expense['definition'].lower() not in ['deposit', 'indskud']:
            amount += expense['amount']

    return amount

def deposit_from_expenses(expenses_str):
    expenses = ast.literal_eval(expenses_str)
    amount = 0
    for expense in expenses:
        if expense['definition'].lower() in ['deposit', 'indskud']:
            amount += expense['amount']

    return amount

tens['deposit'] = tens['expenses'].map(deposit_from_expenses)
tens['rent'] = tens['expenses'].map(rent_from_expenses)
tens = tens.drop(columns=[
    'expenses', 'legacy_id', 'termination_date', 'modernisation', 
    'rent_includes_heating', 'rent_includes_water', 'rent_includes_electricity', 
    'rent_includes_gas', 'takeover_condition', 'parent', 'children_allowed'
])

cols_to_order = [
    'town', 'no_rooms', 'area', 'rent', 'deposit', 'own_kitchen', 
    'refrigerator', 'own_shower', 'own_toilet', 'balcony_terrace'
]
new_cols = cols_to_order + list(tens.columns.drop(cols_to_order))
tens = tens[new_cols]

tens.to_csv('tenancies.csv', index=False)

In [None]:
import pandas as pd

# taken from https://boundingbox.klokantech.com/
lonmin, latmin, lonmax, latmax  = 12.3174815767, 55.5514318794, 12.6862102144, 55.7731694293
df = pd.read_csv('tenancies.csv')
df = df[(df['latitude'] > latmin) & 
        (df['latitude'] < latmax) & 
        (df['longitude'] > lonmin) & 
        (df['longitude'] < lonmax)]
df = df[(df['area'] > 45) & (df['own_kitchen'] == True) & (df['own_shower'] == True)]

In [None]:
import googlemaps
from datetime import datetime
GMAPS_API_KEY = '<REPLACE_WITH_GMAPS_API_KEY>'
gmaps = googlemaps.Client(key=GMAPS_API_KEY)

In [None]:
import numpy as np
from unidecode import unidecode
from functools import partial
from datetime import datetime

gmaps = googlemaps.Client(key=GMAPS_API_KEY)

def directions(origin, destination, arrive_time, name, mode, df):
    arrival_time = datetime.combine(
        datetime(2019, 6, 12), 
        datetime.strptime(arrive_time, '%H:%M').time()
    )
    
    home_address = '{}, {}, {}'.format(
        df['town'].values[0].strip(), 
        df['street'].values[0].strip(), 
        df['number'].values[0].strip()
    )

    if origin is None:
        origin = home_address
    if destination is None:
        destination = home_address
        
    directions = gmaps.directions(
        origin, destination, mode=mode, arrival_time=arrival_time
    )
    print(name, home_address, len(directions))
    df[name] = [directions]*len(df)
    return df

cbs_address = 'Solbjerg Pl. 3, 2000 Frederiksberg'
norreport_address = 'Nørreport'
itu_address = 'Rued Langgaards Vej 7, 2300 København'

annotate_norreport_night = partial(directions, None, norreport_address, '20:00', 'to_norreport_night_raw', 'transit')
annotate_norreport_bike = partial(directions, None, norreport_address, '20:00', 'to_norreport_bike_raw', 'bicycling')
annotate_cbs = partial(directions, None, cbs_address, '09:00', 'to_cbs_raw', 'transit')
annotate_itu = partial(directions, None, itu_address, '09:00', 'to_itu_raw', 'transit')

# print(len(df.groupby(['building_pk'])))
df = df.groupby(['building_pk']).apply(annotate_norreport_night)
df = df.groupby(['building_pk']).apply(annotate_norreport_bike)
df = df.groupby(['building_pk']).apply(annotate_cbs)
df = df.groupby(['building_pk']).apply(annotate_itu)
df.to_csv('tenancies_annotated_travel.csv', index=False)

In [None]:
import geopy.distance

# lat, lon = df['latitude'].iloc[0], df['longitude'].iloc[0]
# address = '{}, {}, {}'.format(df['town'].iloc[0].strip(), df['street'].iloc[0].strip(), df['number'].iloc[0])

def closest_places(home_coord, places_raw): 
#     addresses = [place['formatted_address'] for place in places_raw['results']]
    coords = [(place['geometry']['location']['lat'], place['geometry']['location']['lng'])
              for place in places_raw['results']]
    dists = [geopy.distance.distance(home_coord, coord).km for coord in coords]
    names = [place['name'] for place in places_raw['results']]
    names = ['Fakta' if ('fakta' in name.lower()) else name for name in names]
    names = ['Føtex' if (('fotex' in name.lower()) or ('føtex' in name.lower())) else name for name in names]

    closest = sorted(list(zip(dists, names)), key=lambda item: item[0])
    return closest

def annotate_supermarkets(df):
    lat, lon = (
        df['latitude'].values[0], 
        df['longitude'].values[0]
    )
    
    home_address = '{}, {}, {}'.format(
        df['town'].values[0].strip(), 
        df['street'].values[0].strip(), 
        df['number'].values[0].strip()
    )
    raw_result = gmaps.places_nearby(location=(lat, lon), type='supermarket', rank_by='distance')
    vals = closest_places((lat, lon), raw_result)
    df['supermarkets_raw'] = [vals]*len(df)  
    
    return df

df = df.groupby(['building_pk']).apply(annotate_supermarkets)
df.to_csv('tenancies_travel_supermarkets_annotated.csv', index=False)

In [None]:
df = pd.read_csv('tenancies_travel_supermarkets_annotated.csv')
def trans_market(market):
    replacements = [
        ('fakta', 'Fakta'),
        ('lidl', 'Lidl'),
        ('netto', 'Netto'),
        ('kvickly', 'Kvickly'),
        ('rema', 'REMA'),
        ('superbrugsen', 'Superbrugsen'),
        ('super brugsen', 'Superbrugsen'),
        ('irma', 'Irma'),
        ('aldi', 'Aldi'),
        ('meny', 'Meny'),
        ('købmand', 'MIN KØBMAND')
    ]
    for substring, shop_name in replacements:
        if substring in market.lower():
            return shop_name
        
    return 'OTHER'

import ast
def closest_known_dist(markets):
    dists = [dist for dist, name in ast.literal_eval(markets) if name != 'OTHER']
    dists = list(sorted(dists, reverse=False))
    return dists[0] if len(dists) > 0 else -1
df['supermarket_dist'] = df['supermarkets_raw'].apply(closest_known_dist)

In [None]:
def travel_time(directions_raw):
    directions = ast.literal_eval(directions_raw)[0]
    return directions['legs'][0]['duration']['value']

for name in ['to_norreport_night_raw', 'to_norreport_bike_raw', 'to_cbs_raw', 'to_itu_raw']:
    df[name.replace('_raw', '')] = df[name].apply(travel_time)
df

In [None]:
['town', 'no_rooms', 'area', 'rent', 'deposit', 'own_kitchen',
'refrigerator', 'own_shower', 'own_toilet', 'balcony_terrace', 'url',
'rent_determined', 'advance_rent_months', 'stove', 'freezer',
'cooker_hood', 'dishwasher', 'microwave', 'tumble_dryer',
'washing_machine', 'storage_room', 'integrated_closets', 'furnished',
'disability_housing', 'commonid_ptr', 'pk_property', 'pk',
'tenancy_group_pk', 'building_pk', 'tenancy_no', 'properties',
'tenancy_no_suffix', 'country', 'street', 'number', 'floor', 'door',
'zipcode', 'latitude', 'longitude', 'geo_location_timestamp',
'to_norreport_night_raw', 'to_norreport_bike_raw', 'to_cbs_raw',
'to_itu_raw', 'supermarkets_raw', 'supermarket_dist',
'to_norreport_night', 'to_norreport_bike', 'to_cbs', 'to_itu']

In [None]:
df.drop(columns=['to_norreport_night_raw', 'to_norreport_bike_raw', 'to_cbs_raw', 'to_itu_raw', 'supermarkets_raw']).to_csv('manual.csv')