In [1]:
import pandas as pd
import numpy as np
from random import randint
from random import seed

from math import radians, sin, cos, tan, asin, sqrt
from timeit import default_timer as timer

In [2]:
def haversine_new(lat1, lon1):
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    #reshape for broadcasting
    lat1 = lat1.reshape(len(lat1),1)
    lon1 = lon1.reshape(len(lat1),1)
    lat2 = lat1.reshape(1,len(lat1))
    lon2 = lon1.reshape(1,len(lat1))
    
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    d = np.sin(dlat * 0.5)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon * 0.5)**2
    c = 2.0 * 6371.0
    return c * np.arcsin(np.sqrt(d))

In [3]:
def weighted_trip_length_new(this_trip,hvs_matrix):
    #setup variable
    index = this_trip.index
    sleigh_weight = 10
    prev_dest = 0
    wrw = 0.0
    prev_weight = np.sum(this_trip.Weight) + sleigh_weight
    
    for i in index:
        next_dest = i
        wrw += hvs_matrix[prev_dest][next_dest] * prev_weight  #distance * weights#   
        prev_dest = next_dest
        prev_weight -= this_trip.loc[next_dest,'Weight']
    return wrw

In [4]:
def weighted_reindeer_weariness_new(all_trips, weight_limit = 1000):
    #check total weight for each trip
    if any(all_trips.groupby('TripId').Weight.sum() > weight_limit):
        raise Exception("One of the sleighs over weight limit!")
        
    lat1 = np.array(all_trips['Latitude'].tolist())
    lon1 = np.array(all_trips['Longitude'].tolist())
    hvs_matrix = haversine_new(lat1,lon1)    
    #calculate the wrw for each unique trip
    wrw = 0.0
    uniq_trips = all_trips.TripId.unique()
    for t in uniq_trips:
        this_trip = all_trips[all_trips.TripId==t]
        wrw += weighted_trip_length_new(this_trip,hvs_matrix)
    return wrw

In [17]:
def random_search(N):
    # 1000 * N solutions = max 
    N_eval = 1 * N
    df = pd.read_csv('gifts.csv')
    # Initial conditions 
    loop_1 = 0 #
    loop_2 = 0 
    seed_count = 0 
    best_wrw_list = []
    # Final result is the optimal WRW obtained. 
    RS_stat = pd.DataFrame(columns = ['Max', 'Min', 'Mean', 'Std']) 
    start_time_1 = timer()
  
    # loop 1 
    for i in range(30):
        seed = randint(1, 10000)
        best_wrw = None
        loop_1 += 1
        seed_count += 1
        RS_stat = pd.DataFrame(columns = ['Max', 'Min', 'Mean', 'Std']) 
    # Loop2  * N times
        for j in range(N_eval):
            start_time_2 = timer() # calc time of inner loop
            loop_2 += 1
            
            # Extract 10 random sample from gifts.csv
            df_10 = df.sample(n = 10, random_state = seed) #seed changes every loop, 30 times, 30 seeds
            # Converts GiftId column into list
            giftid = df_10['GiftId'].tolist()
            # Create 10 TripId in a vector
            tripid = np.array(range(11)) # 
            # Permutate the 10 TripId
            tripid_permutate = np.random.randint(10, size = 10) # size = shape size 
            # Match each GiftId with permutated TripId and put in dictionary
            giftid_tripid = dict(zip(giftid, tripid_permutate)) # key = gift, value = trip_id 
            # print(giftid_tripid)
            # Convert dict to dataframe
            giftid_tripid_df = pd.DataFrame(list(giftid_tripid.items()), 
                                            columns = ['GiftId', 'TripId'])
            #print(giftid_tripid_df)
            # Merge dataframe (GiftId & TripId) with 10 random sampled gifts.csv (df_10)
            all_trips = giftid_tripid_df.merge(df_10, on = 'GiftId')
            #print(all_trips)
            # Calculate wrw of that dataset
            new_wrw = weighted_reindeer_weariness_new(all_trips)
            #print(wrw)
            if best_wrw is None:
               best_wrw = new_wrw
            # Replace result with new wrw only if the new wrw is larger than the initial or current result
            elif new_wrw > best_wrw:
                 best_wrw = new_wrw

            best_wrw_list.append(best_wrw)
            best_wrw_array = np.asarray(best_wrw_list)
            MEAN = np.mean(best_wrw_array)
            MAX = np.max(best_wrw_array)
            MIN = np.min(best_wrw_array)
            STD = np.std(best_wrw_array)
            
            end_time_2 = timer()
            # Within loop (tables)
            #print("WRW = {:.0f}  (Time: {:.2f} seconds)".format(wrw, end_time_2 - start_time_2))
        
        end_time_1 = timer()

        print('Seed', seed_count, seed, 'WRW:', best_wrw)

    print('Total runs:',loop_1)
    print('\n')
    print('Total evaluations:',loop_2)
    print('Total time: {:.2f} seconds)'.format(end_time_1 - start_time_1))

    print('Mean WRW:',np.mean(best_wrw_array))
    print('Max WRW:',np.max(best_wrw_array))
    print('Min WRW:', np.min(best_wrw_array))
    print('Standard deviation', np.std(best_wrw_array))
    # Print total time spend to get entire result
    print('Time: {:.2f} seconds)'.format(end_time_1 - start_time_1))
    RS_stat = RS_stat.append({'Max': MAX, 'Min': MIN, 'Mean': MEAN, 'Std':STD},
                            ignore_index = True)


In [18]:
# Run random_search with 10 sample data  
random_search(5)

Seed 1 2559 WRW: 2959869.912977961
Seed 2 3362 WRW: 1555091.4706480952
Seed 3 3182 WRW: 2697024.885001139
Seed 4 9176 WRW: 1418794.030472269
Seed 5 4233 WRW: 2025291.435165956
Seed 6 4134 WRW: 1928757.2351238115
Seed 7 1049 WRW: 3406994.2562604025
Seed 8 4048 WRW: 3046752.8337740744
Seed 9 2546 WRW: 932826.1546058416
Seed 10 435 WRW: 2498559.572727573
Seed 11 9648 WRW: 3121102.427787754
Seed 12 4677 WRW: 2748265.8036284363
Seed 13 5231 WRW: 2480451.308717524
Seed 14 5219 WRW: 1542529.735816947
Seed 15 9811 WRW: 4106624.2063189317
Seed 16 5080 WRW: 4101039.512737943
Seed 17 1609 WRW: 2302977.528781377
Seed 18 4209 WRW: 2120465.74401257
Seed 19 6684 WRW: 2325451.7746517183
Seed 20 9345 WRW: 4598390.25426976
Seed 21 8290 WRW: 3125610.535115262
Seed 22 2837 WRW: 3358570.155960256
Seed 23 923 WRW: 4334216.280841593
Seed 24 6458 WRW: 2465429.1041849107
Seed 25 3623 WRW: 5450080.693567447
Seed 26 50 WRW: 2028307.1287773477
Seed 27 8388 WRW: 3589022.1960675665
Seed 28 6582 WRW: 3586777.7023737

In [19]:
RS_stat

NameError: name 'RS_stat' is not defined