# Building the sample: Retrieving coordinates of settlements and images 

### 0. Imports and relevant paths

In [1]:
import pandas as pd
import numpy as np
import json
import time 
from webdriver_manager.chrome import ChromeDriverManager
import warnings
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import re
import requests as req
from bs4 import BeautifulSoup as bs
import random
import http.client, urllib.parse
import csv
import seaborn as sns

warnings.simplefilter('ignore')

In [2]:
# Define/modify these paths before running the notebook

# geocoding_storage_path = 'INSERT_STORAGE_PATH'
# scrap_session_id = 'INSERT_STORAGE_SESSION_ID'
# image_storage_path = 'INSERT_STORAGE_PATH'

geocoding_storage_path = 'data/ignore'
scrap_session_id = ''
image_storage_path = 'data/images_750_800_12' # Note custom path for the particular image size and zoom

### 1. Geocoding: from settlement name to coordinates

Loading reference DataFrame with all considered settlements for iteration

In [3]:
all_cities_filter = pd.read_csv("data/all_cities_filtered.csv")

Obtaining coordinates of settlements included in the sample employing [positionstack's API](https://positionstack.com/).

In [None]:
# List for storing API responses for later use.
api_responses = []

# List for storing settlements for which geocoding API failed.
not_retrieved_towncodes = []    

# Iterating over filtered sample dataframe containing all settlementes considered.
for place_number in reference_df.index:
    
    town_code, town, country, population = tuple(reference_df.loc[place_number][['town_code','cityname','country', 'latest_figure']])

    time.sleep(1)
    
    # Calling the API
    conn = http.client.HTTPConnection('api.positionstack.com')
    params = urllib.parse.urlencode({
        'access_key': f'{Positionstack_api_key}',
        'query': f'{town}, {country}',
        'limit': 1,
        })
    
    conn.request('GET', '/v1/forward?{}'.format(params))
    
    res = conn.getresponse()
    data = res.read()
    api_responses.append(data)

    coordinates = re.findall('''(-\d+\.\d+|\d+\.\d+)''', data.decode('utf-8'))
    print(coordinates)
    
    # Checking that the API yielded a valid pair of coordinates
    try:
        lat = coordinates[0]
        long = coordinates[1]

    # If no valid coordinates can be retrieved from API responses town code is stored in "not_retrieved_towncodes"   
    except:
        not_retrieved_towncodes.append(town_code)
        pass
    
# Saving api_responses as csv

all_cities_filter['api_response'] = api_responses
api_responses_to_csv = []
for e in api_responses:
    transitional_list = []
    transitional_list.append(e)
    api_responses_to_csv.append(transitional_list)

csv_rows =  api_responses_to_csv

with open(f'{geocoding_storage_path}/api_responses_{scrap_id}.csv', 'w') as f:
  
    write = csv.writer(f)
    write.writerows(csv_rows)

### 3. Retrieving images 

See the [Mapbox delevoper playground](https://docs.mapbox.com/playground/static/) to experiment with different image sizes and zoom levels before serially calling the API

In [None]:
# Load old geocoding API responses

api_responses = pd.read_csv(f"{storage_path}/api_responses_{scrap_id}.csv", sep=",", header=None)

In [None]:
# Resolution and zoom

image_heigth = 800
image_width = 750
zoom = 12

# List to store city codes for which an image could not be retrieved from the API

failed = []

# Iterating over the (old) stored geocoding API results (which contain all urban settlements in the sample)
for i in range(len(api_responses)):
        
    try:
        # retrieve coordinates from the stored results of geocoding api
        coordinates = re.findall('-\d{1,3}\.\d{1,6}|\d{1,3}\.\d{1,6}', str(api_responses.iloc[i][0]))
        lat = coordinates[0]
        lng = coordinates[1]
                                 
        # call API considering the previous defined information
        
        api_response = req.get(f"https://api.mapbox.com/styles/v1/mapbox/satellite-v9/static/{lng},{lat},12,0/{image_width}x{image_heigth}?access_token={Mapbox_api_key}")
        
        bytes = api_response.content
        image = Image.open(io.BytesIO(bytes))
        
        
        # save image
        image.save(f'{image_storage_path}/{i}.png')
                                 
    except:
        failed.append(i)
        pass
    
print(f' A total of: {len(failed)} could not be retrieved')