import thu vien

In [341]:
%matplotlib inline
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations 
from sklearn.cluster import KMeans
import folium
# Load data
data = pd.read_csv('dis.csv',
                   delimiter='\t',)

In [342]:
data.head()

Unnamed: 0,Location Name,Latitude,Longtitude,Volume,Location Type
0,Hải Phòng,20.8,106.6667,67149540.0,Supply
1,TP. Hồ Chí Minh,10.8167,106.6333,326650000.0,Supply
2,Hà Nội,21.0245,105.8412,238705800.0,Demand
3,Bắc Ninh,21.1861,106.0763,26209980.0,Demand
4,Vĩnh Phúc,21.31,105.5967,26426590.0,Demand


In [343]:
# Color options
color_options = {'demand': 'red',
                 'supply': 'green',
                 'flow': 'black',
                 'cog': 'blue',
                 'candidate': 'black',
                 'other': 'gray'}
# Instantiate map
m = folium.Map(location=data[['Latitude', 'Longtitude']].mean(),
               fit_bounds=[[data['Latitude'].min(),
                            data['Longtitude'].min()], 
                           [data['Latitude'].max(),
                            data['Longtitude'].max()]])
                            
# Add volume points
for _, row in data.iterrows():
    folium.CircleMarker(location=[row['Latitude'], 
                                  row['Longtitude']],
                        radius=(row['Volume']**0.5*0.001),
                        weight=1,
                        color=color_options.get(str(row['Location Type']).lower(), 'gray'),
                        tooltip=str(row['Location Name'])+' '+str(row['Volume'])).add_to(m)
                                #row['Longtitude']]).add_to(m)
        
# Zoom based on volume points
m.fit_bounds(data[['Latitude', 'Longtitude']].values.tolist())
# Show the map
m

  coords = (location[0], location[1])


In [344]:
# The outbound shipments cost twice as much as inbound shipments
IB_OB_ratio = 1
def loc_type_mult(x):
    """A function to get the volume multiplier based on the location type and the IB-OB ratio.
    x: The location type
    """
    if x.lower() == 'supply':
        # No need to divide since we are already multiplying the demand
        return 1
    elif x.lower() == 'demand':
        # Only apply multiplier to demand
        return IB_OB_ratio
    else:
        # If neither supply nor demand, remove entirely
        return 0
# Adjust volumes used in the computation based on IB-OB ratio
data['Calc_Vol'] = data['Location Type'].apply(str).apply(loc_type_mult)*data['Volume']

In [345]:
data.head()

Unnamed: 0,Location Name,Latitude,Longtitude,Volume,Location Type,Calc_Vol
0,Hải Phòng,20.8,106.6667,67149540.0,Supply,67149540.0
1,TP. Hồ Chí Minh,10.8167,106.6333,326650000.0,Supply,326650000.0
2,Hà Nội,21.0245,105.8412,238705800.0,Demand,238705800.0
3,Bắc Ninh,21.1861,106.0763,26209980.0,Demand,26209980.0
4,Vĩnh Phúc,21.31,105.5967,26426590.0,Demand,26426590.0


In [346]:
# Fit K-means for 2 centroids
kmeans = KMeans(n_clusters=2, 
                random_state=0).fit(data.loc[data['Calc_Vol']>0, ['Latitude', 
                                                                  'Longtitude']], 
                                    sample_weight=data.loc[data['Calc_Vol']>0, 
                                                           'Calc_Vol'])
# Get centers of gravity from K-means
cogs = kmeans.cluster_centers_
cogs = pd.DataFrame(cogs, columns=['Latitude',
                                   'Longtitude'])
# Get volume assigned to each cluster
data['Cluster'] = kmeans.predict(data[['Latitude', 'Longtitude']])
cogs = cogs.join(data.groupby('Cluster')['Volume'].sum())
# Include assigned COG coordinates in data by point 
data = data.join(cogs, on='Cluster', rsuffix='_COG')

In [347]:
data.head()

Unnamed: 0,Location Name,Latitude,Longtitude,Volume,Location Type,Calc_Vol,Cluster,Latitude_COG,Longtitude_COG,Volume_COG
0,Hải Phòng,20.8,106.6667,67149540.0,Supply,67149540.0,1,20.722094,106.080005,528965100.0
1,TP. Hồ Chí Minh,10.8167,106.6333,326650000.0,Supply,326650000.0,0,11.189729,106.718901,1135910000.0
2,Hà Nội,21.0245,105.8412,238705800.0,Demand,238705800.0,1,20.722094,106.080005,528965100.0
3,Bắc Ninh,21.1861,106.0763,26209980.0,Demand,26209980.0,1,20.722094,106.080005,528965100.0
4,Vĩnh Phúc,21.31,105.5967,26426590.0,Demand,26426590.0,1,20.722094,106.080005,528965100.0


In [348]:
# Add flow lines to centers of gravity to map
for _, row in data.iterrows():
    # Flow lines
    if str(row['Location Type']).lower() in (['demand', 'supply']):
        folium.PolyLine([(row['Latitude'],
                          row['Longtitude']),
                         (row['Latitude_COG'],
                          row['Longtitude_COG'])],
                        color=color_options['flow'],
                        weight=(row['Volume']**0.5*0.0001),
                        opacity=0.8).add_to(m)
                        
# Add centers of gravity to map
for _, row in cogs.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['Latitude'],
                                  row['Longtitude']],
                        radius=(row['Volume']**0.5**0.001),
                        color=color_options['cog'],
                        tooltip=row['Volume']).add_to(m)
    
# Show map
m

In [349]:
cogs

Unnamed: 0,Latitude,Longtitude,Volume
0,11.189729,106.718901,1135910000.0
1,20.722094,106.080005,528965100.0


# Itertools

In [350]:
%matplotlib inline
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations 
from tqdm import tqdm
from sklearn.cluster import KMeans
import folium

data = pd.read_csv('dis.csv',
                   delimiter='\t',)
                             
# Color options
color_options = {'demand': 'red',
                 'supply': 'yellow',
                 'flow': 'black',
                 'cog': 'blue',
                 'candidate': 'black',
                 'other': 'gray'}
# Instantiate map
m = folium.Map(location=data[['Latitude', 'Longtitude']].mean(),
               fit_bounds=[[data['Latitude'].min(),
                            data['Longtitude'].min()], 
                           [data['Latitude'].max(),
                            data['Longtitude'].max()]])
                            
# Add volume points
for _, row in data.iterrows():
    folium.CircleMarker(location=[row['Latitude'], 
                                  row['Longtitude']],
                        radius=(row['Volume']**0.5*0.001),
                        weight=1,
                        color=color_options.get(str(row['Location Type']).lower(), 'gray'),
                        tooltip=str(row['Location Name'])+' '+str(row['Volume'])).add_to(m)
                                #row['Longtitude']]).add_to(m)
        
# Zoom based on volume points
m.fit_bounds(data[['Latitude', 'Longtitude']].values.tolist())
# Show the map
m

  coords = (location[0], location[1])


In [351]:
# The outbound shipments cost twice as much as inbound shipments
IB_OB_ratio = 2
def loc_type_mult(x):
    """A function to get the volume multiplier based on the location type and the IB-OB ratio.
    x: The location type
    """
    if x.lower() == 'supply':
        # No need to divide since we are already multiplying the demand
        return 1
    elif x.lower() == 'demand':
        # Only apply multiplier to demand
        return IB_OB_ratio
    else:
        # If neither supply nor demand, remove entirely
        return 0
# Adjust volumes used in the computation based on IB-OB ratio
data['Calc_Vol'] = data['Location Type'].apply(str).apply(loc_type_mult)*data['Volume']

In [352]:
data.head()

Unnamed: 0,Location Name,Latitude,Longtitude,Volume,Location Type,Calc_Vol
0,Hải Phòng,20.8,106.6667,67149540.0,Supply,67149540.0
1,TP. Hồ Chí Minh,10.8167,106.6333,326650000.0,Supply,326650000.0
2,Hà Nội,21.0245,105.8412,238705800.0,Demand,477411600.0
3,Bắc Ninh,21.1861,106.0763,26209980.0,Demand,52419970.0
4,Vĩnh Phúc,21.31,105.5967,26426590.0,Demand,52853190.0


In [353]:
n = 2 # Number of warehouse
cands = data.loc[data['Location Type'].str.lower()=='candidate']
locs = data.loc[data['Calc_Vol']>0]
total_dist = np.inf
best_cogs = []
# Loop to find best combination of candidate sites
for i in tqdm(list(combinations(cands.index, n))):
    temp_cands = cands.loc[list(i)]
    locs['Cluster'] = 0
    locs['Distance_COG'] = np.inf
    for i_l, r_l in locs.iterrows():
        for i_c, r_c in temp_cands.iterrows():
            # Get distance
            dist = (r_l['Latitude']-r_c['Latitude'])**2
            dist += (r_l['Longtitude']-r_c['Longtitude'])**2
            dist **= 0.5
            # Save values if distance is shorter
            if dist < locs.loc[i_l, 'Distance_COG']:
                # Save distance
                locs.loc[i_l, 'Distance_COG'] = dist
                # Save index of nearest point
                locs.loc[i_l, 'Cluster'] = i_c
    # Weight distance by volume
    locs['Weighted_Distance_COG'] = locs['Distance_COG'] * locs['Calc_Vol']
    # Save scenario if total weighted distance is smaller
    if locs['Weighted_Distance_COG'].sum() < total_dist:
        total_dist = locs['Weighted_Distance_COG'].sum()
        best_cogs = list(list(i))
# Get centers of gravity
cogs = cands.loc[best_cogs, ['Latitude',
                             'Longtitude']]
# Reloop to get site assignment
locs['Cluster'] = 0
locs['Distance_COG'] = np.inf
for i_l, r_l in locs.iterrows():
    for i_c, r_c in cogs.iterrows():
        # Get distance
        dist = (r_l['Latitude']-r_c['Latitude'])**2
        dist += (r_l['Longtitude']-r_c['Longtitude'])**2
        dist **= 0.5
        # Save values if distance is shorter
        if dist < locs.loc[i_l, 'Distance_COG']:
            # Save distance
            locs.loc[i_l, 'Distance_COG'] = dist
            # Save index of nearest point
            locs.loc[i_l, 'Cluster'] = i_c
# Get volume assigned to each cog
cogs = cogs.join(locs.groupby('Cluster')['Volume'].sum())
# Include assigned COG coordinates in data by point 
data = data.join(locs['Cluster'])
data = data.join(cogs, on='Cluster', rsuffix='_COG')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locs['Cluster'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locs['Distance_COG'] = np.inf
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locs['Weighted_Distance_COG'] = locs['Distance_COG'] * locs['Calc_Vol']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[r

In [354]:
# Add flow lines to centers of gravity to map
for _, row in data.iterrows():
    # Flow lines
    if str(row['Location Type']).lower() in (['demand', 'supply']):
        folium.PolyLine([(row['Latitude'],
                          row['Longtitude']),
                         (row['Latitude_COG'],
                          row['Longtitude_COG'])],
                        color=color_options['flow'],
                        weight=(row['Volume']**0.5*0.0001),
                        opacity=0.8).add_to(m)
                        
# Add centers of gravity to map
for _, row in cogs.iterrows():
    # New centers of gravity
    folium.CircleMarker(location=[row['Latitude'],
                                  row['Longtitude']],
                        radius=(row['Volume']**0.5*0.001),
                        weight=1.5,
                        color=color_options['cog'],
                        tooltip=row['Volume']).add_to(m)
    
# Show map
m