In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from copy import copy
import datetime
import pickle
from geopy.geocoders import Nominatim

In [2]:
#Sample date

date_list = [22, 11, 2019] #November 22, 2019

year = int(date_list[2])
month = int(date_list[1])
day = int(date_list[0])

my_date = datetime.date(year, month, day)

In [3]:
#Sample test locations

test_locations = {'L1': (5.5506785,-0.2452914),#solar, 
                  'L2': (5.5480404,-0.1918183),#blck sq
                  'L3': (5.5996372,-0.191487),#tan
                  'L4': (5.55455776,-0.2047379),#mvpk
                  'L5': (5.5544974,-0.199873),#kmpsk
                  'L6': (5.6431396,-0.1726696),#ica
                  #'L7': (5.5996372,-0.191487),#tan
                  #'L8': (5.6298385,-0.2593555),
                  #'L9': (5.6275224,-0.2591407),
                  #'L10': (5.6275224,-0.2591407),
                  #'L11': (5.6461705,-0.2622377)
             }

In [4]:
geolocator = Nominatim()
addresses = []

for key in test_locations:
    location = geolocator.reverse(test_locations[key])
    addresses.append(location.address)

  """Entry point for launching an IPython kernel.


In [5]:
addresses

['Lartebiokorshie, Accra, Accra Metropolitan, Greater Accra Region, GA5857449, Ghana',
 'Black Star Square, 28th February Road, Ministries, Accra, Accra Metropolitan, Greater Accra Region, 000000, Ghana',
 'South Ridge Street, Roman Ridge, Accra, Accra Metropolitan, Greater Accra Region, 12345, Ghana',
 'TUC, Mayor Road, West Ridge, Accra, Accra Metropolitan, Greater Accra Region, 000000, Ghana',
 'Park View Cafe, Liberia Avenue, Ministries, Accra, Accra Metropolitan, Greater Accra Region, 000000, Ghana',
 'East Legon, Accra, Accra Metropolitan, Greater Accra Region, BOX LG25, Ghana']

In [6]:
test_addresses = {'L1': 'Laterbiokorshie,Accra',
                  'L2': 'Independence Square,Accra',
                  'L3': 'South Ridge,Roman Ridge Accra',
                  'L4': 'Mayor Road,West Ridge,Accra',
                  'L5': 'Park View Cafe,Liberia Avenue,Accra',
                  'L6': 'East Legon,Accra',
                  #'L7': '159 West 25th Street NY',
                  #'L8': '486 Brooklyn Avenue NY',
                  #'L9': '70-38 67th Place NY',
                  #'L10': '194 Devoe Street NY',
                  #'L11': '158-46 76th Avenue NY'
             }

In [7]:
def create_guess(points):
    """
    Creates a possible path between all points, returning to the original.
    Input: List of point IDs
    """
    guess = copy(points)
    np.random.shuffle(guess)
    guess.append(guess[0])
    return list(guess)

create_guess(list(test_locations.keys()))

['L3', 'L4', 'L6', 'L2', 'L5', 'L1', 'L3']

In [8]:
def create_generation(points, population=100):
    """
    Makes a list of guessed point orders given a list of point IDs.
    Input:
    points: list of point ids
    population: how many guesses to make
    """
    generation = [create_guess(points) for _ in range(population)]
    return generation

test_generation = create_generation(list(test_locations.keys()), population=12)
print(test_generation)

[['L5', 'L6', 'L4', 'L3', 'L2', 'L1', 'L5'], ['L5', 'L2', 'L3', 'L4', 'L6', 'L1', 'L5'], ['L6', 'L2', 'L4', 'L3', 'L5', 'L1', 'L6'], ['L6', 'L4', 'L1', 'L3', 'L5', 'L2', 'L6'], ['L5', 'L3', 'L2', 'L6', 'L1', 'L4', 'L5'], ['L5', 'L6', 'L3', 'L4', 'L1', 'L2', 'L5'], ['L3', 'L6', 'L4', 'L2', 'L1', 'L5', 'L3'], ['L5', 'L4', 'L2', 'L1', 'L3', 'L6', 'L5'], ['L2', 'L6', 'L3', 'L5', 'L4', 'L1', 'L2'], ['L6', 'L5', 'L4', 'L3', 'L1', 'L2', 'L6'], ['L4', 'L2', 'L6', 'L3', 'L1', 'L5', 'L4'], ['L1', 'L6', 'L3', 'L5', 'L2', 'L4', 'L1']]


In [9]:
filename = "xgb_model.sav"

In [10]:

loaded_model = pickle.load(open(filename, 'rb'))



In [11]:
def travel_time_between_points(point1_id, point2_id):
    """
    Given two points, this calculates travel between them based on a XGBoost predictive model
    """
    
    model_data = {'pickup_longitude' : point1_id[1],
                  'pickup_latitude' : point1_id[0],
                  'dropoff_longitude' : point2_id[1],
                  'dropoff_latitude' : point2_id[0],
                  'latitude_difference' : point2_id[0] - point1_id[0],
                  'longitude_difference' : point2_id[1] - point1_id[1],
                  'trip_distance' : 0.621371 * 6371 * (abs(2 * np.arctan2(np.sqrt(np.square(np.sin((abs(point2_id[0] - point1_id[0]) * np.pi / 180) / 2))), 
                                  np.sqrt(1-(np.square(np.sin((abs(point2_id[0] - point1_id[0]) * np.pi / 180) / 2)))))) + \
                                     abs(2 * np.arctan2(np.sqrt(np.square(np.sin((abs(point2_id[1] - point1_id[1]) * np.pi / 180) / 2))), 
                                  np.sqrt(1-(np.square(np.sin((abs(point2_id[1] - point1_id[1]) * np.pi / 180) / 2)))))))
                 }

    df = pd.DataFrame([model_data], columns=model_data.keys())
    
    pred = np.exp(loaded_model.predict(xgb.DMatrix(df))) - 1
    
    return pred[0]

In [12]:
coordinates = test_locations

In [13]:
def fitness_score(guess):
    """
    Loops through the points in the guesses order and calculates
    how much distance the path would take to complete a loop.
    Lower is better.
    """
    score = 0
    for ix, point_id in enumerate(guess[:-1]):
        score += travel_time_between_points(coordinates[point_id], coordinates[guess[ix+1]])
    return score

def check_fitness(guesses):
    """
    Goes through every guess and calculates the fitness score. 
    Returns a list of tuples: (guess, fitness_score)
    """
    fitness_indicator = []
    for guess in guesses:
        fitness_indicator.append((guess, fitness_score(guess)))
    return fitness_indicator

print(check_fitness(test_generation))

ValueError: feature_names mismatch: ['pickup_latitude', 'pickup_longitude', 'dropoff_latitude', 'dropoff_longitude', 'latitude_difference', 'longitude_difference', 'trip_distance'] ['pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'latitude_difference', 'longitude_difference', 'trip_distance']

In [14]:
X_test = X_test.reindex_axis(sorted(X_test.columns), axis=1)

NameError: name 'X_test' is not defined

In [None]:
check_fitness(test_generation)

In [None]:
test_generation

In [None]:
check_fitness

In [None]:
print