In [2]:
from helpers import *
from time import time
import pandas as pd
from tqdm import tqdm
import os

In [3]:
points_df_path = "./points_df_025.csv"
point_set_df = pd.read_csv(points_df_path, header=0, index_col=0)
point_set_df

Unnamed: 0,Latitude,Longitude
0,51.737184,-0.620643
1,51.737184,-0.617012
2,51.737184,-0.613382
3,51.737183,-0.609751
4,51.737183,-0.606120
...,...,...
58243,51.238843,0.312049
58244,51.238815,0.315640
58245,51.238786,0.319231
58246,51.238757,0.322822


In [4]:
API_key = os.getenv('OS_DATA_HUB_API_KEY_0')

# Simple solution

- 1 api key
- Make 6 calls
- Wait a minute
- Repeat

In [5]:

def get_land_types_for_points_in_csv(csv_path, save_path, start_point_index, end_point_index, diameter_resolution, API_key):
  points_df = pd.read_csv(csv_path)
  subset_points_df = points_df.loc[start_point_index:end_point_index]
  
  print('Number of points to be processed:', len(subset_points_df))
  print('Start/End index (inclusive):', start_point_index, end_point_index)
  print('Start point:', subset_points_df.iloc[0]['Latitude'], ',', points_df.iloc[0]['Longitude'])
  print('End point:', points_df.iloc[-1]['Latitude'], ',', points_df.iloc[-1]['Longitude'])
  
  
  land_type_list = []
  t0 = time()
  for i in tqdm(range(len(subset_points_df))):
    # Save every 10
    if i % 10 == 0 and i != 0:
      # Create or append to a csv while waiting
      df = pd.DataFrame(dtype='object', index=subset_points_df.index[:len(land_type_list)])
      df['Land_Type'] = land_type_list
      df.to_csv(save_path)

      # Wait 2 seconds to repeat
      while(time() - t0 < 2):
        continue
      t0 = time() # reset the timer
    
    land_type = get_land_type(subset_points_df.iloc[i]['Latitude'],
                              subset_points_df.iloc[i]['Longitude'],
                              diameter_resolution,
                              API_key)
    land_type_list.append(land_type)
  
  print('Saving...')
  df = pd.DataFrame(dtype='object', index=subset_points_df.index)
  df['Land_Type'] = land_type_list
  df.to_csv(save_path)
  print('Done.')



get_land_types_for_points_in_csv('./points_df_025.csv', './land_types_23428-30000.csv', 23428, 30000, 0.25, API_key)
pd.read_csv('./land_types_23428-30000.csv', header=0, index_col=0)

  0%|          | 0/6573 [00:00<?, ?it/s]

Number of points to be processed: 6573
Start/End index (inclusive): 23428 30000
Start point: 51.53694625538829 , -0.6206426520917254
End point: 51.238728303226935 , 0.3264124829835313


100%|██████████| 6573/6573 [2:57:38<00:00,  1.62s/it]  

Saving...
Done.





Unnamed: 0,Land_Type
23428,"Building, Green_Space, Urban_Area"
23429,"Building, Green_Space, Urban_Area"
23430,"Building, Green_Space, Urban_Area"
23431,"Building, Green_Space, Urban_Area"
23432,"Water, Building, Green_Space, Urban_Area"
...,...
29996,"Water, Green_Space"
29997,"Water, Building, Green_Space, Urban_Area"
29998,"Water, Building, Green_Space, Urban_Area"
29999,"Water, Building, Green_Space, Urban_Area"


## Alternate solution:
Make better use of time. Twice as fast but still quite slow.

In [None]:

def manage_throttle_limit_timing(history, transactions_per_point, save_path, subset_points_df, land_type_list):
  
  # get transactions in last minute
  for i in range(0, len(history)):
    index = len(history) -1 - i
    within_minute = time() - history[index] < 60
    if not within_minute:
      break
    last_transaction_within_minute_index = index
  
  transactions_within_minute = (len(history) - last_transaction_within_minute_index - 1) * transactions_per_point
  
  if transactions_within_minute > 600 - transactions_per_point:
    # you must wait for the last transaction to clear this time window
    df = pd.DataFrame(dtype='object', index=subset_points_df.index[:len(land_type_list)])
    df['Land_Type'] = land_type_list
    df.to_csv(save_path)
      
    while time() - history[last_transaction_within_minute_index+1] < 60:
      continue


def get_land_types_for_points_in_csv(csv_path, save_path, start_point_index, end_point_index, diameter_resolution, API_key):
  # Open CSV
  points_df = pd.read_csv(csv_path)
  # Get correct set of points
  subset_points_df = points_df.loc[start_point_index:end_point_index]
  
  print('Number of points to be processed:', len(subset_points_df))
  print('Start/End index (inclusive):', start_point_index, end_point_index)
  print('Start point:', subset_points_df.iloc[0]['Latitude'], ',', points_df.iloc[0]['Longitude'])
  print('End point:', points_df.iloc[-1]['Latitude'], ',', points_df.iloc[-1]['Longitude'])
  
  
  land_type_list = []
  # Get land type of those set 6 points
  transactions_per_point = 9
  
  history = []
  for i in tqdm(range(len(subset_points_df))):
    
    if len(history) > 6:
      manage_throttle_limit_timing(history, 9, save_path, subset_points_df, land_type_list)
      # save and wait
    
    
    history += [time()] # Make time stamp
    land_type = get_land_type(subset_points_df.iloc[i]['Latitude'],
                              subset_points_df.iloc[i]['Longitude'],
                              diameter_resolution,
                              API_key)
    land_type_list.append(land_type)
  
  print('Saving...')
  df = pd.DataFrame(dtype='object', index=subset_points_df.index)
  df['Land_Type'] = land_type_list
  df.to_csv(save_path)
  print('Done.')



get_land_types_for_points_in_csv('./points_df_025.csv', './land_types_53001-58247.csv', 53001, 58247, 0.25, API_key)
pd.read_csv('./land_types_53001-58247.csv', header=0, index_col=0)

Number of points to be processed: 5247
Start/End index (inclusive): 53001 58247
Start point: 51.285120919964 , -0.6206426520917254
End point: 51.238728303226935 , 0.3264124829835313


  1%|          | 51/5247 [02:23<3:55:23,  2.72s/it]

## More complicated Solution

- Maintain a dictionary of keys and time of last use
- Grab a usable key from the dictionary each time