In [1]:
from dotenv import load_dotenv
import os
load_dotenv('C:\Data\PRACTICE\.env')

True

In [11]:
#Data Preparation
import pandas as pd
path = 'your_data_path_location'
df = pd.read_csv(path)

def geodataframe(df, col_long, col_lat):
    """
    Func for creating geodataframe using dataframe with column longitude and latitude
    ----------
    df : pandas.DataFrame()
       sales dataframe 
    col_long : string
        longitude column in dataframe
    col_lat : string
        latitude column in dataframe
    ----------
    """
    import geopandas as gpd
    gdf = gpd.GeoDataFrame(
        df, geometry=gpd.points_from_xy(
            df[col_long], 
            df[col_lat]), 
        crs = 'epsg:4326')
    return gdf

def warehouse_location(name, longitude, latitude):
    """
    Func for create warehouse location
    ----------
    name : string
        name of warehouse (as unique id of warehouse)
    longitude : float
        longitude location of warehouse
    latitude : float
        latitude location of warehouse
    ----------
    """
    import geopandas as gpd
    import pandas as pd
    
    warehouse = pd.DataFrame(
    {
        'unique_id':[name],
        'Latitude':[latitude],
        'Longitude':[longitude]
    }
    )
    gdf_ws = gpd.GeoDataFrame(warehouse, 
                              geometry=gpd.points_from_xy(
                                  warehouse.Longitude,
                                  warehouse.Latitude)
                             , crs = 'epsg:4326')
    return gdf_ws


In [238]:
#rute optimization per salesman(gdf_sales sudah per day atau gdf clustering)
def route_opt(gdf, col_unique, gdf_col_long, gdf_col_lat,  gdf_ws):
    """
    Func for route optimization using mlrose analysis
    ----------
    gdf : geopandas.GeoDataFrame()
        gdf after selected column (days or week)
    col_unique : string
        unique colomn id of gdf
    gdf_col_long : string
        longitude column of gdf
    gdf_col_lat : string
        latitude column od gdf
    gdf_ws : geopandas.GeoDataFrame()
        warehouse geodataframe
    ----------
    """
    import pandas as pd
    import geopandas as gpd
    import requests
    import json
    import mlrose
    import six
    import numpy as np
    import sys
    sys.modules['sklearn.externals.six'] = six
    import pyproj
    from shapely.geometry import LineString
    from shapely.ops import transform
    
    projectgeom = pyproj.Transformer.from_crs('epsg:4326', 'epsg:32748', always_xy=True).transform

    baseurl= os.getenv('BASEURL')
    
    gdf = gdf.reset_index().drop(columns = 'index')
    #merger gdf_ws and gdf
    data = pd.DataFrame()
    data['unique_id'] = gdf[col_unique]
    data['Longitude'] = gdf[gdf_col_long]
    data['Latitude'] = gdf[gdf_col_lat]
    data['geometry'] = gdf['geometry']
    
    merge = pd.concat([data[['unique_id','Longitude','Latitude','geometry']],
                       gdf_ws
                      ])
    
    #route calculation
    coordinates = list(map(list, zip(merge['Longitude'], merge['Latitude'])))
    headers = {'Content-type': 'application/json', 'Accept': 'application/json'}
    profile = "truck"
    try:
        res = requests.post(os.getenv('API_MATRIX'), data=json.dumps(dict(profile=profile,points=coordinates)), headers=headers).json()
    except:
        res = requests.post(os.getenv('API_MATRIX'), data=json.dumps(dict(profile=profile,points=coordinates)), headers=headers).json()
    matrix_dist = pd.DataFrame()
    matrix_time = pd.DataFrame()
    
    for i in range(0,len(merge)):
        matrix_dist[str(i)] = "" 
        matrix_dist[str(i)] = res['distance'][i]
        matrix_time[str(i)] = ""
        matrix_time[str(i)] = res['time'][i]  
    matrix_dist = matrix_dist+1 
    matrix_time = matrix_time+1
    
    coords_list = list(map(tuple, zip(merge['Longitude'], merge['Latitude'])))
    
    dist_list = []
    for k in range(0,len(merge)):
        for l in range(k+1,len(merge)):
            test = tuple([k,l,matrix_dist[k:k+1].values[0][l]])
            dist_list.append(test)
            
    #Route optimization using mlrose        
    fitness_coords = mlrose.TravellingSales(coords = coords_list)
    fitness_dists = mlrose.TravellingSales(distances = dist_list)
    problem_fit = mlrose.TSPOpt(length = len(merge), fitness_fn = fitness_coords, maximize=False)


    best_state, best_fitness = mlrose.genetic_alg(problem_fit, mutation_prob = 0.2,
                                                  max_attempts = 100, random_state = 2)
    
    
    #calculation after optimization
    merge = merge.reset_index().drop(columns = 'index') 
    list_table = []
    for i in range(0, len(best_state)):
        table = merge[merge.index == best_state[i]]
        list_table.append(table)
    table_ = pd.concat(list_table).reset_index().drop(columns = 'index')
    
    #Dataframe preparation
    rute = table_[['unique_id','Longitude','Latitude']].copy()
    rute['distance(meter)'] = ""
    rute['time(second)'] = ""
    rute['geom'] = ""
    
    #Data driving calculation
    merge = merge.reset_index().drop(columns = 'index') 
    list_table = []
    for i in range(0, len(best_state)):
        table = merge[merge.index == best_state[i]]
        list_table.append(table)
    table_ = pd.concat(list_table).reset_index().drop(columns = 'index')  
    
    list_code = table_.unique_id.tolist()

    line = []
    list_distance = []
    for i in range(0, len(table_)-1):
        start = table_[table_['unique_id']==list_code[i]]
        end = table_[table_['unique_id']==list_code[i+1]]
        a = start[['Latitude','Longitude']]
        b = end[['Latitude','Longitude']]
        params_iso = dict(destination=str(b.Longitude.values[0])+','+str(b.Latitude.values[0]),
        origin = str(a.Longitude.values[0])+','+str(a.Latitude.values[0]),
                      mode='truck')
        route_a_b=requests.get(baseurl+os.getenv('BODY_API_DIR'),params=params_iso).json()

        lines = LineString(route_a_b['features'][0]['geometry']['coordinates'])
        line.append(lines)
        poi_line = LineString(route_a_b['features'][0]['geometry']['coordinates'])

        distance = round(transform(projectgeom,poi_line).length,2)
        list_distance.append(distance)

    z = table_[table_['unique_id']==list_code[0]].copy()
    a = end[['Latitude','Longitude']]
    b = z[['Latitude','Longitude']]
    params_iso = dict(destination=str(b.Longitude.values[0])+','+str(b.Latitude.values[0]),
    origin = str(a.Longitude.values[0])+','+str(a.Latitude.values[0]),
                  mode='car')

    route_a_b=requests.get(baseurl+os.getenv('BODY_API_DIR'),params=params_iso).json()

    lines = LineString(route_a_b['features'][0]['geometry']['coordinates'])
    line.append(lines)
    poi_line = LineString(route_a_b['features'][0]['geometry']['coordinates'])

    distance = round(transform(projectgeom,poi_line).length,2)
    list_distance.append(distance)

    list_time = []
    for o in range(len(best_state)-1):
        distance_final = matrix_dist[str(best_state[o])][best_state[o+1]]
        time_final = matrix_time[str(best_state[o])][best_state[o+1]]
        list_time.append(time_final)
    start = best_state[0]
    end = best_state[len(best_state)-1]
    time_final = matrix_time[str(end)][start]
    list_time.append(time_final) 
    
    #append data
    rute['distance(meter)'] = list_distance
    rute['time(second)'] = list_time
    rute['geom'] = line
    rute = rute.set_geometry('geom')
    
    return rute.set_crs('epsg:4326')

In [181]:
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from scipy.optimize import linear_sum_assignment
import numpy as np


def get_even_clusters(X, cluster_size):
    n_clusters = int(np.ceil(len(X)/cluster_size))
    kmeans = KMeans(n_clusters, init ='k-means++', random_state=42)
    kmeans.fit(X)
    centers = kmeans.cluster_centers_
    centers = centers.reshape(-1, 1, X.shape[-1]).repeat(cluster_size, 1).reshape(-1, X.shape[-1])
    distance_matrix = cdist(X, centers)
    clusters = linear_sum_assignment(distance_matrix)[1]//cluster_size
    return clusters

def centroid(x_coords,y_coords):
    _len = len(x_coords)
    centroid_x = sum(x_coords)/_len
    centroid_y = sum(y_coords)/_len
    return [centroid_x, centroid_y] 

def Cluster_opt(gdf, cluster_size):
    """
    Func for get cluster data based on cluster size
    ----------
    gdf : geopandas.GeoDataFrame
        sales geodataframe
    cluster_size : int
        jumlah cluster yang akan dibuat, based on total sales
    ----------
    """
    import geopandas as gpd
    import pandas as pd
    import numpy as np
    
    #clustering
    n_clusters = int(np.ceil(len(gdf)/cluster_size))
    gdf['cluster'] = get_even_clusters(
        gdf[['Longitude','Latitude']], 
        n_clusters
    )
    gdf['cluster'] = gdf['cluster'].astype(str)
    
    return gdf

In [246]:
#Map Visualization
def Map_Viz(gdf, col_class):
    """
    Func for Map visualization based on geodataframe and column classification
    ----------
    gdf : geopandas.GeoDataFrame
        geodataframe that want to get map visualization
    col_class : string
        column oof geodataframe is using for classification
    ----------
    """
    m = gdf.explore(
        tiles = 'Cartodb Positron',
        column = co_class,
        legend = True,
        marker_kwds = dict(radius=3)
    )
    return m

def Map_Clustering(gdf_cluster):
    """
    Func for Map visualization based on cluster geodataframe
    ----------
    gdf_cluster : geopandas.GeoDataFrame
        cluster geodataframe that want to get map visualization
    ----------
    """
    #center clastering
    list_coord = []
    for i in np.arange(0, gdf_cluster.cluster.nunique()):
        testing = gdf[gdf['cluster']==str(i)]
        coord = centroid(testing.Longitude.tolist(),testing.Latitude.tolist())
        list_coord.append(coord)
        
    center = pd.DataFrame()
    center['lat'] = np.array(list_coord)[:,1]
    center['lon'] = np.array(list_coord)[:,0]
    center['cluster'] = np.array(np.arange(0, gdf_cluster.cluster.nunique(), dtype=int)).tolist()
    gdf_center = gpd.GeoDataFrame(center, geometry=gpd.points_from_xy(center.lon, center.lat), crs = 'epsg:4326')
    gdf_center['cluster'] = gdf_center['cluster'].astype(str)
    
    m = gdf_cluster.explore(
        tiles = 'Cartodb Positron',
        column = 'cluster',
        legend = False
    )
    gdf_center.explore(
        m=m,
        column = 'cluster',
        marker_kwds = dict(radius = 7),
        legend = True
    )
    return m

def Route_Map(df_route, gdf_poi, gdf_ws):
    """
    Func for Route Map Visualization
    ----------
    df_route : geopandas.GeoDataFrame
        route dataframe with line geometry column, calculate from func route_opt
    gdf_poi : geopandas.GeoDataFrame
        poi of customer 
    gdf_ws : geopandas.GeoDataFrame
        warehouse geodataframe
    ----------
    """
    indexing = df_route[df_route['unique_id']== gdf_ws.unique_id[0]].index[0]
    start = route[indexing:]
    end = route[:indexing]
    route_plot = pd.concat([start, end]).reset_index().drop(columns = 'index')

    list_arah = np.arange(1, len(route)+1, dtype=int)
    route_plot['arah'] = list_arah
    route_plot['arah'] = route_plot['arah'].astype(str)
    poi_plot = pd.merge(gdf_poi, 
                        route_plot[['unique_id','arah']], 
                        left_on = 'CustomerCode', 
                        right_on = 'unique_id')
    gdf_ws['arah'] = str(1)
    poi = pd.concat([poi_plot[['unique_id','arah','geometry']],
                     gdf_ws[['unique_id','arah','geometry']]])
    
    #plot
    m = route_plot.explore(tiles = 'Cartodb Positron',
                           column = 'arah')
    gdf_ws.explore(m=m,
                  marker_kwds=dict(radius=10, fill=True, color='black'))
    poi.explore(m=m,
                    column = 'arah',
               legend = False,
               marker_kwds=dict(radius=5, fill=True))
    return m, route_plot

def Chart(df_route, gdf_poi, gdf_ws, col_name='distance(meter)', title='Distance/Time Visualization'):
    """
    Func for timestamp distance and time visualization based on 'arah'
    ----------
    df_route : geopandas.GeoDataFrame
        route dataframe with line geometry column, calculate from func route_opt
    gdf_poi : geopandas.GeoDataFrame
        poi of customer 
    gdf_ws : geopandas.GeoDataFrame
        warehouse geodataframe
    col_name : string
        column name for timestamp visualization. Default using distance(meter)
    title : string
        title of Chart visualization
    ----------
    """
    indexing = df_route[df_route['unique_id']== gdf_ws.unique_id[0]].index[0]
    start = route[indexing:]
    end = route[:indexing]
    route_plot = pd.concat([start, end]).reset_index().drop(columns = 'index')

    list_arah = np.arange(1, len(route)+1, dtype=int)
    route_plot['arah'] = list_arah
    route_plot['arah'] = route_plot['arah'].astype(str)
    
    #Chart Visualization
    fig = px.line(route_plot, 
                 y=col_name, 
                 x='arah', 
                 text = col_name,
                 title=title)
    fig.update_xaxes(categoryorder='array', 
                     categoryarray= list_arah)
    fig.update_layout(xaxis_title="Arah", 
                      yaxis_title=col_name)
    fig.update_traces(textposition="bottom right")
    
    return fig