In [1]:
from math import radians, cos, sin, asin, sqrt
import pandas as pd
from google.colab import auth
import gspread
from google.auth import default
from urllib.parse import quote
from jinja2 import Environment, PackageLoader, Template
import folium
import json
import unicodedata
import requests
import time
from scipy.spatial.distance import cdist
import numpy as np
from sklearn.cluster import DBSCAN
from math import ceil
import base64
from math import acos, degrees, radians, cos, sin, sqrt


In [2]:
def haversine(p1, p2):
    """
    Calculate the great circle distance in kilometers between two points
    on the earth (specified in decimal degrees)
    """
    lat1, lon1 = p1
    lat2, lon2 = p2

    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

def middle_point(latlon1, latlon2):
  return ((latlon1[0] + latlon2[0])/2, (latlon1[1] + latlon2[1])/2)

def dist_format(km):
  if km < 1:
    meter = km * 1000
    return "{:.0f}m".format(meter)
  return "{:.2f}km".format(km)

def dist_near(min_dist):
  if min_dist < 1:
    return min_dist * 1.8
  if min_dist < 3:
    return min_dist * 1.6
  if min_dist < 5:
    return min_dist * 1.3
  if min_dist < 15:
    return min_dist * 1.2
  else:
    return min_dist * 1.05

def get_max_connections(min_dist):
  if min_dist < 1:
    return 6
  if min_dist < 3:
    return 5
  if min_dist < 5:
    return 4
  if min_dist < 12:
    return
  else:
    return 2

def normalize_string(input_string):
    normalized_string = unicodedata.normalize('NFKD', input_string)
    return ''.join([c for c in normalized_string if not unicodedata.combining(c)]).strip().lower()

def text_to_base64(text):
    text_bytes = text.encode('utf-8')
    base64_bytes = base64.b64encode(text_bytes)
    base64_string = base64_bytes.decode('utf-8')
    return base64_string

def key(lat, lon, name):
    base = text_to_base64(f"{lat}|{lon}").replace('=', '')
    return normalize_string(f"{base}_{name}").replace('.', '').replace(' ', '_')

def interpolate_color(value, start_color, end_color):
    """Interpolate between two RGB colors based on a value from 0 to 1."""
    return (
        int(start_color[0] + (end_color[0] - start_color[0]) * value),
        int(start_color[1] + (end_color[1] - start_color[1]) * value),
        int(start_color[2] + (end_color[2] - start_color[2]) * value)
    )



In [3]:
def hex_to_rgba(hex_color, alpha=1.0):
    hex_color = hex_color.lstrip('#')
    if len(hex_color) == 6:
        r, g, b = bytes.fromhex(hex_color)
    elif len(hex_color) == 8:
        r, g, b, a = bytes.fromhex(hex_color)
        alpha = a / 255  # Use provided alpha if not specified
    else:
        raise ValueError("Invalid hex color format. Use #RRGGBB or #RRGGBBAA.")

    return f'rgba({r}, {g}, {b}, {alpha})'


def hex_brightness(hex_color):
    hex_color = hex_color.lstrip('#')
    if len(hex_color) != 6:
        raise ValueError("Invalid hex color format. Use #RRGGBB.")

    r, g, b = bytes.fromhex(hex_color)

    # Calculate brightness using the luminance formula
    brightness = (0.299 * r + 0.587 * g + 0.114 * b)

    return brightness

# Example usage
brightness_value = hex_brightness('#ff5733')
print(brightness_value)  # Output: Brightness value


def contrast(hex_color, alpha=1):
    brightness = hex_brightness(hex_color)
    return f'rgba(0,0,0,{alpha})' if brightness > 128 else f'rgba(255,255,255,{alpha})'

def calculate_angle(lat_X, lon_X, lat1, lon1, lat2, lon2):
    distance1 = haversine((lat_X, lon_X), (lat1, lon1))
    distance2 = haversine((lat_X, lon_X), (lat2, lon2))

    # Calculate the angle using the law of cosines
    angle = acos(
        (distance1**2 + distance2**2 - haversine((lat1, lon1), (lat2, lon2))**2) /
        (2 * distance1 * distance2)
    )
    return degrees(angle)

133.128


In [4]:
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

In [5]:
sheet_url = 'https://docs.google.com/spreadsheets/d/1wpSL4paOEUum2bl4whpG0yJ018P-9-j72OOYBsax_hc'

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# Wait for slow formulas to resolve while loading
def check_loading(worksheet):
    data = worksheet.get_all_values()
    return any("Loading..." in row for row in data)

gsheets = gc.open_by_url(sheet_url)
worksheet = gsheets.worksheet('Places')

while check_loading(worksheet):
    print("Waiting for formulas to resolve...")
    time.sleep(1)

print('Sheets fully loaded, getting all values')
sheets = worksheet.get_all_values()
print('Done')

Waiting for formulas to resolve...
Sheets fully loaded, getting all values
Done


In [8]:
df = pd.DataFrame(sheets[1:], columns=sheets[0])
df = df.dropna(axis=0, subset='PlaceName')
df = df[df['PlaceName'].astype(bool)]

df['lat'] = pd.to_numeric(df['Latitude'],errors='coerce')
df['lon'] = pd.to_numeric(df['Longitude'],errors='coerce')

points = df[["lat", "lon", "PlaceName"]]
df = df.drop_duplicates(subset=['lat', 'lon']).reset_index(drop=True)


In [9]:
class CustomFoliumMap(folium.Map):

  _scripts = ""
  _template = Template(u"""
        {% macro header(this, kwargs) %}
            <meta name="viewport" content="width=device-width,
                initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
            <style>
                #{{ this.get_name() }} {
                    position: {{this.position}};
                    width: {{this.width[0]}}{{this.width[1]}};
                    height: {{this.height[0]}}{{this.height[1]}};
                    left: {{this.left[0]}}{{this.left[1]}};
                    top: {{this.top[0]}}{{this.top[1]}};
                }
                {{ this.get_custom_css() }}
            </style>
        {% endmacro %}

         {% macro html(this, kwargs) %}
         <div class="full reveal" id="exampleModal1" data-reveal>
         <button class="close-button" data-close aria-label="Close modal" type="button">
         <span aria-hidden="true">&times;</span>
         </button>
         <div id="modalContent"></div>
         </div>

        <div class="folium-map" id={{ this.get_name()|tojson }} ></div>
        <div id="visibleMarkersBar"></div>
        {% endmacro %}

        {% macro script(this, kwargs) %}
            var {{ this.get_name() }} = L.map(
                {{ this.get_name()|tojson }},
                {
                    center: {{ this.location|tojson }},
                    crs: L.CRS.{{ this.crs }},
                    {%- for key, value in this.options.items() %}
                    {{ key }}: {{ value|tojson }},
                    {%- endfor %}
                }
            );

            {%- if this.control_scale %}
            L.control.scale().addTo({{ this.get_name() }});
            {%- endif %}

            {% if this.objects_to_stay_in_front %}
            function objects_in_front() {
                {%- for obj in this.objects_to_stay_in_front %}
                    {{ obj.get_name() }}.bringToFront();
                {%- endfor %}
            };
            {{ this.get_name() }}.on("overlayadd", objects_in_front);
            $(document).ready(objects_in_front);
            {%- endif %}

            {{ this.get_custom_script() }}

        {% endmacro %}
        """)


  def __init__(self, *args, **kwargs):
    super().__init__(*args, **kwargs)
    self.default_js = [*super().default_js,
                       ("foundation", "https://cdn.jsdelivr.net/npm/foundation-sites@6.7.5/dist/js/foundation.min.js"),
                       ]

    self.default_css = [*super().default_css,
                 ("foundation_min", "https://cdn.jsdelivr.net/npm/foundation-sites@6.7.5/dist/css/foundation.min.css"),
                ]

  def get_name(self, *args, **kwargs):
    return "map"


  def get_custom_script(self, *args, **kwargs):
    return self._scripts + """

    $(document).foundation()

    function setCurrentDisplayKey(key){
      localStorage.setItem('displayPlaceKey', key);
    }
    """

  def get_custom_css(self, *args, **kwargs):
    return """
    #gallery > img {
      width: auto;
      height:100vh;
      margin: 5px;
      border: 3px solid whitesmoke;
    }

    #gallery {
      display: flex;
      flex-wrap: wrap;
      justify-content: space-around;
    }

    .chip {
      display: inline-block;
      padding: 0 10px;
      height: 1.5em;
      font-size: 1em;
      line-height: 1.5em;
      border-radius: 25px;
      background-color: #f1f1f1;
    }

    .dist {
      display: inline-block;
      padding: 0 10px;
      height: 1.5em;
      font-size: 1em;
      line-height: 1.5em;
      border-radius: 25px;
      background-color: rgba(0,0,0,.1);
      font-weight: bold;
      transition: all 0.3s ease;
    }

    .dist:hover {
      font-weight: bold;
      font-size: 1.5em;
      filter: brightness(85%);
    }

    #visibleMarkersBar {
      width: 100vw;
      min-height: 10px;
      position: sticky;
      bottom: 0;
      z-index: 2000;
      background-color: rgba(0,0,0,0.5);
      flex-direction: row;
      flex-wrap: wrap;
      justify-content: flex-start;
   }

  .markchip {
    display: inline-block;
    padding: 0 25px;
    height: 20px;
    font-size: 11px;
    line-height: 23px;
    border-radius: 25px;
    background-color: #f1f1f1;
    margin: 2px;
  }

  .markchip img {
    float: left;
    margin: 0 10px 0 -25px;
    height: 20px;
    width: 19px;
    border-radius: 50%;
  }
    """

  def add_script(self, text):
    self._scripts = f"""{self._scripts}
    {text}
    """

  @staticmethod
  def create():
    mp = CustomFoliumMap(
          location=[points.lat.mean(), points.lon.mean()], zoom_start=9, control_scale=True
      )

    mp._name = "map"
    mp._id = ""

    mp.add_script(requests.get("https://raw.githubusercontent.com/davetroy/geohash-js/master/geohash.js").text)

    # Specify the path to your file
    file_path = './drive/MyDrive/VIAGEM CHILE - 2024/scripts.js'

    # Read the file
    with open(file_path, 'r') as file:
        script = file.read()

    mp.add_script(script)

    return mp

In [10]:
def modal_html(row, key):
  return f"""<h1><img src="{row.IconUrl}" style="max-width: 1em">{row.PlaceName}</h1>
  <div style="max-width: 40em">
  <div><span style="font-weight: bold;">Address: </span><span>{row.Address}</span></div>

  <div class="chip"><span style="font-weight: bold;">Wikiname: </span><span>{row.WikiTitle}</span></div>

  <div class="chip">{row.lat},{row.lon}</div>
  <div class="chip">{key}</div>


  <p>{row.Snippet}...</p>
  </div>
  <hr/>
  <div>
  <button class="button success large expanded" data-open="exampleModal1" onClick="fill('{key}')">➕ More details</button>

  <a href="{row.MapsUrl}" target="_blank" class="button secondary">🌎 Google Maps</a>
  <a href="{row.CityMapperUrl}" target="_blank" class="button secondary">🟢 Citymapper</a>
  <a href="{row.WikipediaUrl}" target="_blank" class="button secondary">📚 Wikipedia</a>
  </div>
  """

In [11]:
def get_hex_color(distance):
    if distance < 0:
        distance = 0

    # Define color ranges and corresponding distance limits
    color_ranges = [
        ((0, 255, 0), (0, 0, 255), 5),    # Green to Blue
        ((0, 255, 255), (255, 255, 0), 10),  # Blue to Yellow
        ((255, 255, 0), (255, 165, 0), 30), # Yellow to Orange
        ((255, 165, 0), (255, 0, 0), 50),  # Orange to Red
        ((255, 0, 0),(100, 0, 0), 200) # Red to dark red
    ]

    # Find the appropriate range
    for i, (start_color, end_color, limit) in enumerate(color_ranges):
        if distance <= limit:
            if i == 0:
                value = distance / limit
            else:
                previous_limit = color_ranges[i - 1][2]
                value = (distance - previous_limit) / (limit - previous_limit)
            interpolated_color = interpolate_color(value, start_color, end_color)
            return f'#{interpolated_color[0]:02X}{interpolated_color[1]:02X}{interpolated_color[2]:02X}'

    return '#640000'

In [12]:
df[pd.isna(df['lat'])]

Unnamed: 0,PlaceName,Category,Type,Location,Country,MapsUrl,CityMapperUrl,GeoJSON,Latitude,Longitude,...,Image4,PlaintextTitle,PlaintextContent,Icon,IconUrl,IconImage,IconFillColor,iconBorderColor,lat,lon


In [13]:
X = df[['lat', 'lon']].values
dbscan = DBSCAN(eps=5/6371.0, min_samples=2, algorithm='ball_tree', metric='haversine').fit(np.radians(X))

df['cluster_id'] = dbscan.fit_predict(X)

df_centroid = df[df['cluster_id'] != -1].groupby('cluster_id').agg(
    lat=('lat', 'mean'),
    lon=('lon', 'mean'),
    count=('cluster_id', 'count')
).reset_index()

def calculate_radius(cluster_id):
    cluster_points = df[df['cluster_id'] == cluster_id][['lat', 'lon']].values
    centroid = df_centroid[df_centroid['cluster_id'] == cluster_id][['lat', 'lon']].values[0]

    distances = [haversine(centroid, point) * 1000 for point in cluster_points]  # Convert to meters
    radius = max(distances) if distances else 0  # Handle empty clusters
    return radius + 50

# Calculate radius for each cluster and add to df_centroid
df_centroid['radius'] = df_centroid['cluster_id'].apply(calculate_radius)

df_centroid['PlaceName'] = df_centroid['cluster_id'].apply(
    lambda x: ', '.join(df[df['cluster_id'] == x]['PlaceName'])
)
df_centroid

Unnamed: 0,cluster_id,lat,lon,count,radius,PlaceName
0,0,-33.437492,-70.651476,4,165.488248,"Metropolitan Cathedral of Santiago, Plaza de A..."
1,1,-33.44202,-70.653815,3,98.285407,"Palácio La Moneda, Monumento al Presidente Sal..."
2,2,-33.438844,-70.652753,2,88.769674,"Museu Chileno de Arte Pré-Colombiana, Palacio ..."
3,3,-33.438829,-70.640923,4,158.024011,"Barrio Lastarria, Centro Cultural Gabriela Mis..."
4,4,-33.440202,-70.643939,2,106.835205,"Cerro Santa Lucía, Santa Lucía estacíon"
5,5,-33.433391,-70.650438,2,112.184003,"Central Market of Santiago, Monumento a los Hé..."
6,6,-33.417741,-70.606481,2,55.061794,"Sky Costanera, Costanera Mall"
7,7,-33.436786,-70.634283,5,186.306756,"Plaza de la Dignidad, Estatua del Genio de la ..."
8,8,-33.040735,-71.627899,5,249.736399,"Cerro Concepción, Paseo Gervasoni, Ascensor El..."
9,9,-33.032126,-71.63089,2,88.911766,"Museo Maritimo Nacional Chile, Paseo 21 de May..."


In [14]:
def add_centroids(dforig):
  df = dforig.copy()
  df['ItemType'] ='UNKNOWN'

  df.loc[df.cluster_id == -1, 'ItemType'] = 'NO_CLUSTER'
  df.loc[df.cluster_id >= 0, 'ItemType'] = 'IN_CLUSTER'
  df_centroid['ItemType']='CENTROID'

  df = pd.concat([df, df_centroid], ignore_index=True)
  return df


df_clustered = add_centroids(df).drop_duplicates(subset=['PlaceName'], keep='last')

In [15]:
def dist_statistics(dforig):
    df = dforig.copy()

    data = []

    for i, row1 in df.iterrows():
        p1 = (row1['lat'], row1['lon'])
        distances = []

        for j, row2 in df.iterrows():
            if i != j:
                p2 = (row2['lat'], row2['lon'])
                dist = haversine(p1, p2)
                distances.append(dist)

        avg_dist = np.mean(distances)
        std_dev = np.std(distances)

        counts = {
            'min_distance': min(distances),
            'max_distance': max(distances),
            'conn_<=_30m': sum(d <= 0.03 for d in distances),
            'conn_30m_50m': sum(0.03 < d <= 0.05 for d in distances),
            'conn_50m_100m': sum(0.05 < d <= 0.1 for d in distances),
            'conn_100m_300m': sum(0.1 < d <= 0.3 for d in distances),
            'conn_300m_1km': sum(0.3 < d <= 1 for d in distances),
            'conn_1km_5km': sum(1 < d <= 5 for d in distances),
            'conn_5km_15km': sum(5 < d <= 15 for d in distances),
            'conn_15km_50km': sum(15 < d <= 50 for d in distances),
            'conn_50km_150km': sum(50 < d <= 150 for d in distances),
            'conn_150km_500km': sum(150 < d <= 500 for d in distances),
            'conn_gt_500km': sum(d > 500 for d in distances)
        }

        data.append({
            'PlaceName': row1['PlaceName'],
            'avg_distance_km': avg_dist,
            'std_dev_distance_km': std_dev,
            **counts,
            'close_avg': (counts['conn_30m_50m'] + counts['conn_100m_300m'] + counts['conn_300m_1km'])/3,
            'med_avg': (counts['conn_1km_5km'] + counts['conn_5km_15km'] + counts['conn_15km_50km'])/3,
            'far_avg': (counts['conn_50km_150km'] + counts['conn_150km_500km'])/2,
            'really_far_avg': counts['conn_gt_500km'],
        })

    result_df = pd.DataFrame(data)

    result_df['conncount'] = result_df.apply(
      lambda row: int(max(
          ceil(  ((row.close_avg * 0.6) + (row.med_avg * 0.20) + (row.far_avg * 0.10) + (row.really_far_avg * 0.01)) * 0.95  ), 1
      )),
      axis=1
    )
    return dforig.merge(result_df, on='PlaceName', how='left').drop_duplicates(subset=['PlaceName'], keep='first')

df_diststat = dist_statistics(df_clustered)
df_diststat[['PlaceName','ItemType', 'lat', 'lon']]
# df_diststat[df_diststat['ItemType'] == 'CENTROID']

Unnamed: 0,PlaceName,ItemType,lat,lon
0,Metropolitan Cathedral of Santiago,IN_CLUSTER,-33.437581,-70.651394
1,Plaza de Armas de Santiago,IN_CLUSTER,-33.437894,-70.651092
2,Palácio La Moneda,IN_CLUSTER,-33.442419,-70.653946
3,Museu Chileno de Arte Pré-Colombiana,IN_CLUSTER,-33.438668,-70.652392
4,Vinícola Concha y Toro,NO_CLUSTER,-33.445965,-70.665978
...,...,...,...,...
174,"Santuário Cerro San Cristobal, Virgen de la In...",CENTROID,-33.425660,-70.633323
175,"Bar Liguria Lastarria, Fabula Restaurant, The ...",CENTROID,-33.437303,-70.641249
176,"Express de Lider Merced, Café Mosqueto",CENTROID,-33.437659,-70.644570
177,"Los Héroes estacíon, Los Héroes L1 estacíon",CENTROID,-33.446326,-70.660760


In [16]:
def connect(df, itemType='CENTROID', distMultiplier=2, maxConn=3):
    # Filter the DataFrame for the specified item type
    centroids = df[df['ItemType'] == itemType]
    connections = []

    # Step 3: Iterate through centroids to calculate distances
    for i, rowA in centroids.iterrows():
        # Calculate distances from rowA to all other centroids
        distances = centroids.apply(
            lambda row: haversine((rowA['lat'], rowA['lon']), (row['lat'], row['lon'])),
            axis=1
        )
        shortest_dist = distances[distances > 0.3].min() if not distances[distances > 0.3].empty else None

        for j, rowB in centroids.iterrows():
            if i != j:  # Avoid self-connection
                dist = haversine((rowA['lat'], rowA['lon']), (rowB['lat'], rowB['lon']))

                # Check if the distance falls within the allowed range
                if shortest_dist is not None and dist <= distMultiplier * shortest_dist:
                    connections.append({
                        'PlaceNameA': rowA['PlaceName'],
                        'PlaceNameB': rowB['PlaceName'],
                        'latA': rowA['lat'],
                        'lonA': rowA['lon'],
                        'latB': rowB['lat'],
                        'lonB': rowB['lon'],
                        'color': get_hex_color(dist),
                        'dist': dist,
                        'id': text_to_base64(f"{(abs(rowA['lat']*10000) + abs(rowB['lat']*10000))*(abs(rowA['lon']*10000) + abs(rowB['lon']*10000))/dist*100}")
                    })

    # Create DataFrame from the connections and drop duplicates
    df_connections = pd.DataFrame(connections)
    df_connections = df_connections.drop_duplicates(subset=['id'])

    # Group by 'PlaceNameA' and keep only the maxConn shortest distances
    df_connections = df_connections.groupby('PlaceNameA').apply(lambda x: x.nsmallest(maxConn, 'dist')).reset_index(drop=True)

    return df_connections

centroids_dist = connect(df_diststat, 'CENTROID', 1.5)

  df_connections = df_connections.groupby('PlaceNameA').apply(lambda x: x.nsmallest(maxConn, 'dist')).reset_index(drop=True)


In [17]:

def calculate_direction(latA, lonA, latB, lonB):
    # Calculate the difference in latitudes and longitudes
    d_lat = latB - latA
    d_lon = lonB - lonA

    # Determine the direction based on the signs of the differences
    if d_lat > 0 and d_lon > 0:
        return 'NE'
    elif d_lat > 0 and d_lon < 0:
        return 'NW'
    elif d_lat < 0 and d_lon > 0:
        return 'SE'
    elif d_lat < 0 and d_lon < 0:
        return 'SW'
    elif d_lat > 0:
        return 'N'
    elif d_lat < 0:
        return 'S'
    elif d_lon > 0:
        return 'E'
    else:
        return 'W'

def connect_no_cluster(df, itemType='NO_CLUSTER', dist_min=0.3, dist_max=500, maxConn=3):
    # Filter the DataFrame for NO_CLUSTER items
    no_cluster_items = df[df['ItemType'] == itemType]

    # Filter possible connections (NO_CLUSTER, IN_CLUSTER, CENTROID)
    possible_connections = df[df['ItemType'].isin(['NO_CLUSTER', 'IN_CLUSTER', 'CENTROID'])]

    connections = []

    for i, rowA in no_cluster_items.iterrows():
        # Calculate distances from rowA to all other possible connections
        distances = possible_connections.apply(
            lambda row: haversine((rowA['lat'], rowA['lon']), (row['lat'], row['lon'])),
            axis=1
        )

        # Iterate over the potential connections
        for j, rowB in possible_connections.iterrows():
            if i != j:  # Avoid self-connection
                dist = haversine((rowA['lat'], rowA['lon']), (rowB['lat'], rowB['lon']))

                # Check if the distance falls within the allowed range
                if dist_min <= dist <= dist_max:
                    direction = calculate_direction(rowA['lat'], rowA['lon'], rowB['lat'], rowB['lon'])
                    connections.append({
                        'PlaceNameA': rowA['PlaceName'],
                        'PlaceNameB': rowB['PlaceName'],
                        'latA': rowA['lat'],
                        'lonA': rowA['lon'],
                        'latB': rowB['lat'],
                        'lonB': rowB['lon'],
                        'direction': direction,
                        'color': get_hex_color(dist),
                        'dist': dist,
                        'id': text_to_base64(f"{(abs(rowA['lat']*10000) + abs(rowB['lat']*10000))*(abs(rowA['lon']*10000) + abs(rowB['lon']*10000))/dist*100}")
                    })

    # Create DataFrame from the connections and drop duplicates
    df_connections = pd.DataFrame(connections)
    df_connections = df_connections.drop_duplicates(subset=['id'])

    # Group by 'PlaceNameA' and keep only the maxConn shortest distances
    df_connections = df_connections.groupby(['PlaceNameA', 'direction']).apply(lambda x: x.nsmallest(1, 'dist')).reset_index(drop=True)
    df_connections = df_connections.groupby('PlaceNameA').apply(lambda x: x.nsmallest(maxConn, 'dist')).reset_index(drop=True)
    df_connections = df_connections.groupby('PlaceNameB').apply(lambda x: x.nsmallest(maxConn, 'dist')).reset_index(drop=True)


    return df_connections

# Example usage:
no_cluster_connections = connect_no_cluster(df_diststat, 'NO_CLUSTER', 0.3, 200, 2)
no_cluster_connections

  df_connections = df_connections.groupby(['PlaceNameA', 'direction']).apply(lambda x: x.nsmallest(1, 'dist')).reset_index(drop=True)
  df_connections = df_connections.groupby('PlaceNameA').apply(lambda x: x.nsmallest(maxConn, 'dist')).reset_index(drop=True)
  df_connections = df_connections.groupby('PlaceNameB').apply(lambda x: x.nsmallest(maxConn, 'dist')).reset_index(drop=True)


Unnamed: 0,PlaceNameA,PlaceNameB,latA,lonA,latB,lonB,direction,color,dist,id
0,Plaza Sotomayor,Ascensor Concepcion,-33.038496,-71.628250,-33.040767,-71.626410,SE,#00EF0F,0.305283,MzEwMDc3ODM0MTYwMDIyLjI1
1,Reloj de Flores de Viña del Mar,Ascensor Concepcion,-33.023308,-71.567168,-33.040767,-71.626410,SW,#2BFFD3,5.853880,MTYxNjAxMzgwNTg0NjguMDc=
2,Castillo Hidalgo,"Bar Liguria Lastarria, Fabula Restaurant, The ...",-33.439423,-70.643325,-33.437303,-70.641249,NE,#00EF0F,0.304350,MzEwNDUzMTM4MzkyMDY4LjI=
3,Castillo Hidalgo,Bellas Artes estacíon,-33.439423,-70.643325,-33.436451,-70.644221,NW,#00ED11,0.340785,Mjc3MjYzNjU5Njg1MjcyLjg=
4,Fuente Alemana,Bora pro Chile,-33.425380,-70.611444,-33.428739,-70.621072,SW,#00CD31,0.968474,OTc0OTMzNTE4MTE4NzUuNTM=
...,...,...,...,...,...,...,...,...,...,...
147,Viña Undurraga,Viña Santa Rita,-33.646079,-70.885914,-33.724648,-70.680145,SE,#FFCD00,20.947478,NDU1MzAxMDI3NjI5My40Mzg=
148,Farellones,Viña Undurraga,-33.560451,-70.783635,-33.646079,-70.885914,SW,#FFEF00,13.430729,NzA4OTA1NTY3MjExMi4xOA==
149,Río Maipo,Viña Undurraga,-33.868336,-70.811185,-33.646079,-70.885914,NW,#FFB800,25.661278,MzcyODAyODE1MjkyOC4zMDU3
150,Parque de La Familia,Wooden Man,-33.425326,-70.679116,-33.422843,-70.682013,NW,#00EB13,0.385402,MjQ1MTkxODA0OTQzODU0LjM0


In [18]:
df_connections_combined = pd.concat([no_cluster_connections, centroids_dist], ignore_index=True)

mp = CustomFoliumMap.create()

distance_items = list()
marker_icons = list()
icon_circles = list()
cluster_icon = list()
cluster_circle = list()

# Distance Lines

for index, row in df_connections_combined.iterrows():
  p1 = (row.latA, row.lonA)
  p2 = (row.latB, row.lonB)

  dist_unit = dist_format(row.dist)

  bg=hex_to_rgba(row.color, alpha=.8)
  txt=contrast(row.color, alpha=.7)


  polyline = folium.PolyLine([p1,p2]
    , color=row.color
    , weight=3
    , opacity=.8
    , stroke=True
    , tooltip=f"({dist_unit}) {row.PlaceNameA} [TO] {row.PlaceNameB}"
  )

  distmarker = folium.Marker(middle_point(p1,p2),
                icon=folium.DivIcon(html=f"""
                <span class="dist" style="background-color: {bg}; color: {txt};">
                {dist_format(row.dist)}
                </span>""")
                )
  distance_items.append(polyline)
  distance_items.append(distmarker)


# Add markers
for index, row in df.iterrows():

  placeKey = key(row.lat, row.lon, row.PlaceName)

  popup = folium.Popup(folium.Html(modal_html(row, placeKey), script=True), max_width=1000)

  size = 'small' if row['Type'] in ['supermarket', 'restaurant', 'coffee', 'metro_station'] else 'default'
  circle_radius = 20 if size == 'small' else 40
  icon_size = 35 if size == 'small' else 40

  circle = folium.Circle(
    location=[row.lat, row.lon],
    radius=circle_radius,  # Radius in meters
    color=row.iconBorderColor,
    fill=True,
    fill_color=row.IconFillColor,
    fill_opacity=0.7,
    class_name='circle'
  )
  icon_circles.append(circle)

  marker=folium.Marker([row.lat, row.lon],
      popup=popup,
      tooltip=row.PlaceName,
      icon=folium.features.CustomIcon(row.IconUrl ,icon_size=(icon_size, icon_size)),
      iconKey=placeKey,
      iconName=row.PlaceName,
      iconUrl=row.IconUrl,
  )
  marker_icons.append(marker)

  endcoord = quote(f"{row.lat},{row.lon}")

pages = {}

for index, row in df.iterrows():
  placeKey = key(row.lat, row.lon, row.PlaceName)
  data = dict(
          title=quote(row.PlaceName),
          summary=row.PlaintextContent,
          images=[image for image in [row.Image1, row.Image2, row.Image3, row.Image4] if image is not None and not pd.isna(image) and not image == '#REF!' and not image == '#ERR!' and not image == '' and not image == 'Loading...'],
          near=[],
          lat=row.lon,
          lon=row.lat,
          maps=row.MapsUrl,
          citymapper=row.CityMapperUrl,
          icon=row.IconUrl,
  )
  pages[placeKey] = data

mp.add_script(f"""const placeMap = {json.dumps(pages)}""")

# Add Centroids
for index, row in df_centroid.iterrows():
  p = [row.lat, row.lon]

  circle = folium.Circle(
    location=p,
    tooltip=f"Radius of {row.radius:.2f} m",
    radius=row.radius,  # Radius in meters
    color='rgba(0,0,0,.2)',
    fill=True,
    fill_color='#fff',
    fill_opacity=0.2,
    class_name='circle',
  )
  cluster_circle.append(circle)

  marker=folium.Marker(p,
      icon=folium.features.CustomIcon('https://img.icons8.com/?size=200&id=6ZPBuF7As4Bz&format=png' ,icon_size=(35, 35)),
      iconUrl='https://img.icons8.com/?size=200&id=6ZPBuF7As4Bz&format=png',
  )
  cluster_icon.append(marker)

# Add items
items = [*cluster_circle, *icon_circles,*distance_items, *cluster_icon, *marker_icons]

for item in items:
  item.add_to(mp)


mp.save('map-chile.html')
mp

In [19]:
def tag_group(group):
    place_name = group['PlaceNameA'].iloc[0]  # Extract PlaceNameA from the group
    max_allowed = df_diststat.loc[df_diststat['PlaceName'] == place_name, 'conncount'].iloc[0]
    smallest = group.nsmallest(max_allowed, 'dist')

    group['delete'] = True  # Set all to True initially
    group.loc[smallest.index, 'delete'] = False  # Set the smallest to False

    return group

def calculate_distances(dforig, df_centroid, df_diststat):
    df = dforig.copy()
    df = df[df['cluster_id'] == -1]
    df = pd.concat([df, df_centroid], ignore_index=True)

    results = []
    for i, rowA in df.iterrows():
        for j, rowB in df.iterrows():
            dist = haversine((rowA['lat'], rowA['lon']), (rowB['lat'], rowB['lon']))

            results.append({
                'PlaceNameA': rowA['PlaceName'],
                'PlaceNameB': rowB['PlaceName'],
                'latA': rowA['lat'],
                'lonA': rowA['lon'],
                'latB': rowB['lat'],
                'lonB': rowB['lon'],
                'dist': dist,
                'color': get_hex_color(dist),
                'delete': False,
            })

    dfres = pd.DataFrame(results)  # Create dfres from results
    dfres.loc[dfres['dist'] <= 0.3, 'delete'] = True

    max_conn_mapping = df_diststat.set_index('PlaceName')['conncount'].to_dict()

    # Use include_groups=False to suppress the warning
    dfres = dfres.groupby('PlaceNameA', as_index=False, group_keys=False).apply(tag_group)
    dfres = dfres[dfres['delete'] == False]

    dfres = dfres.groupby('PlaceNameB', as_index=False, group_keys=False).apply(tag_group)
    dfres = dfres[dfres['delete'] == False]


    dfres['sorted_places'] = dfres.apply(lambda x: tuple(sorted([x['PlaceNameA'], x['PlaceNameB']])), axis=1)
    dfres = dfres.drop_duplicates(subset='sorted_places')

    dfres = dfres[dfres['PlaceNameA'] != dfres['PlaceNameB']]

    dfres.reset_index(drop=True, inplace=True)

    return dfres[['PlaceNameA', 'PlaceNameB', 'latA', 'latB', 'lonA', 'lonB', 'dist', 'color']]

dfdist = calculate_distances(df, df_centroid, df_diststat)


  dfres = dfres.groupby('PlaceNameA', as_index=False, group_keys=False).apply(tag_group)
  dfres = dfres.groupby('PlaceNameB', as_index=False, group_keys=False).apply(tag_group)
