In [1]:
import pandas as pd
import numpy as np
from haversine import haversine as hs
from haversine import Unit
import pickle

In [2]:
###----------------------FILEPATHS----------------------###
secondary_facility_path = '../Data/Secondary_Facility_500.csv'
demand_path = '../ResearchData/1.Demand_Distribution.csv'

# result_path = '../Results/Agglomerative_clustering_rq_200.pkl'
result_path = '../Results/Agglomerative_clustering_rq_50.pkl'

customer_path = '../Results/50_customer_coordinates.npy'
# customer_path = '../Results/200_customer_coordinates.npy'

# optimized_demand_path = '../Results/200_customer_demands.npy'
optimized_demand_path = '../Results/50_customer_demands.npy'

###----------------------PARAMETERS----------------------###
# Rq = 200
Rq = 50
stdev = 100

In [3]:
###----------------------READ ALL INPUT DATAFILES----------------------###
q_df = pd.read_csv(secondary_facility_path)
demand_df = pd.read_csv(demand_path)
demand_df['Co-ordinates'] = demand_df.apply(lambda x : np.array([x['Latitude'], x['Longitude']]), axis=1)

In [4]:
# Function to compute similarity matrix using demand, coordinates, distance type
def get_similarity_matrix(W, demand, stdev, distance_type='haversine'):
    if distance_type=='euclidean':
        ## Distance matrix using Euclidean distance
        distance_matrix = np.linalg.norm(W[:, np.newaxis, :] - W[np.newaxis, :, :], axis=-1)
        n = len(W)
        similarity_matrix = np.zeros((n, n))

        for i in range(n):
            for j in range(n):
                if i == j:
                    similarity_matrix[i, j] = 0
                else:
                    similarity_matrix[i, j] = np.mean([demand[i], demand[j]]) * np.exp(-distance_matrix[i, j]/stdev**2)

        ## Putting lower triangle to zero
        n = similarity_matrix.shape[0]
        similarity_matrix[np.tril_indices(n)] = 0

    else:
        ## Distance matrix using Haversine distance
        n = len(W)
        distance_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(n):
                distance_matrix[i, j] = hs(tuple(W[i]), tuple(W[j]), unit='mi')

        similarity_matrix = np.zeros((n, n))

        for i in range(n):
            for j in range(n):
                if i == j:
                    similarity_matrix[i, j] = 0
                else:
                    similarity_matrix[i, j] = np.mean([demand[i], demand[j]]) * np.exp(-distance_matrix[i, j]/stdev**2)

        n = similarity_matrix.shape[0]
        similarity_matrix[np.tril_indices(n)] = 0

    return similarity_matrix

# Function to find and merge nodes with maximum similarity
def find_max_similarity_nodes(similarity_matrix):
    max_similarity = 0
    max_indices = (0, 0)

    # Iterate through the upper triangular part of the similarity matrix
    for i in range(len(similarity_matrix)):
        for j in range(i + 1, len(similarity_matrix)):
            if similarity_matrix[i, j] > max_similarity:
                max_similarity = similarity_matrix[i, j]
                max_indices = (i, j)

    return max_indices


In [5]:
### Initialization 

W = np.array(demand_df['Co-ordinates'].tolist())
demand = np.array(demand_df['Demand'].tolist())

In [6]:
W.shape

(371, 2)

In [7]:
%%time
###----------------------AGGLOMERATIVE CLUSTERING TILL W = RQ----------------------###

while len(W) > Rq:
    ###----------------------GET MAX SIMILARITY INDICES ----------------------###
    similarity_matrix = get_similarity_matrix(W, demand, 100, 'euclidean')
    max_indices = find_max_similarity_nodes(similarity_matrix)
    print(max_indices)
    print(similarity_matrix[max_indices[0], max_indices[1]])

    ###----------------------COMPUTE W_new, demand_new AND THEN UPDATE W AND demand ACCORDINGLY----------------------###
    W_new = (demand[max_indices[0]]*W[max_indices[0]] + demand[max_indices[1]]*W[max_indices[1]]) / (demand[max_indices[0]] + demand[max_indices[1]])
    demand_new = demand[max_indices[0]] + demand[max_indices[1]]

    indices_to_remove = list(max_indices)
    mask = np.ones(len(W), dtype=bool)
    mask_demand = np.ones(len(demand), dtype=bool)
    mask[indices_to_remove] = False
    mask_demand[indices_to_remove] = False

    ###----------------------UPDATED COORDINATES (Wi MATRIX)----------------------###
    W = W[mask]
    W = list(W)
    W.append(W_new)
    W = np.array(W)

    ###----------------------UPDATED DEMAND (Di MATRIX)----------------------###
    demand = demand[mask]
    demand = np.append(demand, demand_new)
    
    

(0, 1)
86612.39044829548
(0, 369)
119753.08667914508
(0, 368)
137706.02092774332
(2, 367)
154756.8461782249
(4, 366)
170780.31643488686
(2, 365)
186386.5186625184
(1, 364)
201893.34994295094
(0, 363)
217033.53683539986
(0, 362)
231630.33884430275
(0, 361)
245665.13389405925
(0, 360)
256834.8243761205
(1, 359)
267014.1184223986
(1, 358)
276617.2779738287
(0, 357)
284748.6411212949
(0, 356)
292965.1382481024
(0, 355)
300821.4002638547
(4, 354)
308466.6504531374
(1, 353)
316062.01687024103
(4, 352)
323438.3062157701
(1, 351)
330607.028354128
(0, 350)
337765.2297635366
(0, 349)
345118.7962821367
(0, 348)
351936.7493365852
(1, 347)
358807.99927600945
(2, 346)
365180.0467468404
(1, 345)
371314.1591145304
(3, 344)
377462.1665107456
(1, 343)
383542.18211535824
(0, 342)
389660.01834437926
(0, 341)
395973.6343134182
(0, 340)
401698.6723913531
(1, 339)
406883.76046775345
(0, 338)
411964.10671528865
(2, 337)
416577.9776462826
(4, 336)
421141.575737134
(0, 335)
425135.66635740985
(0, 334)
429551.32

In [8]:
W.shape

(50, 2)

In [9]:
### NOW THAT WE HAVE CLUBBED THE CUSTOMERS TOGETHER -> COMPUTE VQ~ (which is |VQ~| = Rq) SUCH THAT EVERY CUSTOMER HAS A MINIMUM DISTANCE VQ~

In [10]:
# q_df['Coordinates'] = q_df.apply(lambda x : [x['Latitude'], x['Longitude']], axis=1)

In [11]:
import numpy as np
from scipy.spatial import distance

vq_coordinates = q_df[['Latitude', 'Longitude']].values 
w_coordinates = W

# Compute distances
distances = distance.cdist(vq_coordinates, w_coordinates)

# Find the minimum distances for each set of points
min_distances_vq = np.min(distances, axis=1)
min_distances_w = np.min(distances, axis=0)

# Sort the indices based on the minimum distances
indices_vq = np.argsort(min_distances_vq)[:200]
indices_w = np.argsort(min_distances_w)[:200]

# Extract the points at minimum distances
vq_minimum_points = vq_coordinates[indices_vq]
w_minimum_points = w_coordinates[indices_w]


In [12]:
matching_indices_dict = {f'VQ_{vq_idx}': f'K_{w_idx}' for vq_idx, w_idx in zip(indices_vq, indices_w)}

In [13]:
### SERVING BETWEEN SECONDARY FACILITY TO CUSTOMERS 
matching_indices_dict

{'VQ_65': 'K_17',
 'VQ_440': 'K_15',
 'VQ_338': 'K_48',
 'VQ_51': 'K_46',
 'VQ_161': 'K_28',
 'VQ_317': 'K_26',
 'VQ_50': 'K_32',
 'VQ_237': 'K_11',
 'VQ_31': 'K_9',
 'VQ_332': 'K_19',
 'VQ_126': 'K_49',
 'VQ_385': 'K_5',
 'VQ_435': 'K_22',
 'VQ_12': 'K_31',
 'VQ_143': 'K_2',
 'VQ_287': 'K_24',
 'VQ_89': 'K_1',
 'VQ_80': 'K_21',
 'VQ_348': 'K_45',
 'VQ_397': 'K_38',
 'VQ_45': 'K_36',
 'VQ_445': 'K_25',
 'VQ_113': 'K_40',
 'VQ_319': 'K_29',
 'VQ_241': 'K_12',
 'VQ_400': 'K_39',
 'VQ_450': 'K_47',
 'VQ_223': 'K_41',
 'VQ_189': 'K_44',
 'VQ_178': 'K_42',
 'VQ_247': 'K_23',
 'VQ_378': 'K_6',
 'VQ_454': 'K_13',
 'VQ_281': 'K_37',
 'VQ_314': 'K_34',
 'VQ_121': 'K_30',
 'VQ_171': 'K_27',
 'VQ_174': 'K_33',
 'VQ_225': 'K_3',
 'VQ_211': 'K_20',
 'VQ_268': 'K_4',
 'VQ_100': 'K_7',
 'VQ_491': 'K_35',
 'VQ_459': 'K_14',
 'VQ_263': 'K_43',
 'VQ_77': 'K_8',
 'VQ_330': 'K_0',
 'VQ_484': 'K_10',
 'VQ_381': 'K_16',
 'VQ_182': 'K_18'}

In [14]:
## Save the Vq to K dictionary to a pickle file
with open(result_path, 'wb') as f:
    pickle.dump(matching_indices_dict, f)

## Save the optimized customers and respective demands to .npy 
np.save(customer_path, W)
np.save(optimized_demand_path, demand)