In [1]:
import pandas as pd
import re
from sklearn.neighbors import NearestNeighbors

In [2]:
from sklearn.cluster import KMeans

In [3]:
data = pd.read_excel('Location of pirarucu collection.xlsx')

data

Unnamed: 0,Name,Type,lng_coordinates,lat_coordinates,"Production (2022, tons)",Revenue
0,Rio Solimões,Development Reserves,"3°14'12""S","64°46'40""W",102.94,105882.35
1,Rio Negro,Development Reserves,"0°25'23""S","64°39'22""W",102.94,105882.35
2,Rio Purus,Development Reserves,"7°30'25""S","66°18'13""W",102.94,105882.35
3,Rio Juruá,Development Reserves,"6°31'09""S","69°25'19""W",102.94,105882.35
4,Alvarães,City (town),"3°12'49""S","64°48'34""W",123.59,125534.12
5,Anori,City (town),"3°44'53""S","61°39'31""W",123.59,125534.12
6,Barcelos,City (town),"0°58'26""S","62°55'32""W",123.59,125534.12
7,Beruri,City (town),"3°53'54""S","61°22'26""W",123.59,125534.12
8,Carauari,City (town),"4°52'38""S","66°53'48""W",123.59,125534.12
9,Fonte Boa,City (town),"2°30'54""S","66°05'45""W",123.59,125534.12


In [4]:
# Function to convert DMS to decimal degrees
def dms_to_decimal(dms):
    parts = re.split('[°\'"]', dms)
    degrees = float(parts[0])
    minutes = float(parts[1])
    seconds = float(parts[2][:-1]) if parts[2][-1] in 'NSEW' else float(parts[2])
    direction = parts[2][-1]
    
    decimal = degrees + minutes / 60 + seconds / 3600
    if direction in 'S'or'W':
        decimal = -decimal
    
    return decimal

In [5]:
# Convert coordinates to decimal
data['lng_decimal'] = data['lng_coordinates'].apply(dms_to_decimal)
data['lat_decimal'] = data['lat_coordinates'].apply(dms_to_decimal)

In [6]:
# Separate city (town) and non-city locations
cities = data[data['Type'] == 'City (town)']
non_city_types_corrected = ['Development Reserves', 'Extractive Reserves', 'Indigenous Lands']
non_cities_filtered_corrected = data[data['Type'].isin(non_city_types_corrected)]

In [7]:
# Extract coordinates
city_coords = cities[['lng_decimal', 'lat_decimal']].to_numpy()
non_city_coords_filtered_corrected = non_cities_filtered_corrected[['lng_decimal', 'lat_decimal']].to_numpy()

In [8]:
# Use Nearest Neighbors to find the closest city for each non-city location
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(city_coords)
distances_filtered_corrected, indices_filtered_corrected = nbrs.kneighbors(non_city_coords_filtered_corrected)

In [9]:
# Assign the nearest city to each filtered non-city location
non_cities_filtered_corrected['Nearest_City'] = indices_filtered_corrected.flatten()
non_cities_filtered_corrected['Nearest_City_Name'] = non_cities_filtered_corrected['Nearest_City'].apply(lambda x: cities.iloc[x]['Name'])

non_cities_filtered_corrected[['Name', 'Type', 'Nearest_City_Name']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_cities_filtered_corrected['Nearest_City'] = indices_filtered_corrected.flatten()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_cities_filtered_corrected['Nearest_City_Name'] = non_cities_filtered_corrected['Nearest_City'].apply(lambda x: cities.iloc[x]['Name'])


Unnamed: 0,Name,Type,Nearest_City_Name
0,Rio Solimões,Development Reserves,Alvarães
1,Rio Negro,Development Reserves,Maraã
2,Rio Purus,Development Reserves,Lábrea
3,Rio Juruá,Development Reserves,Itamarati
21,Reserva Extrativista Auati-Paraná,Extractive Reserves,Fonte Boa
22,Reserva Extrativista Baixo Juruá,Extractive Reserves,Juruá
23,Reserva Extrativista do Médio Juruá,Extractive Reserves,Carauari
24,Reserva Extrativista do Médio Purús,Extractive Reserves,Lábrea
25,Reserva Extrativista Ituxí,Extractive Reserves,Lábrea
26,Reserva Extrativista do Rio Jutaí,Extractive Reserves,Tonantins


In [10]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [11]:
import folium
# Create a map object with the centre set to Brazil
mymap = folium.Map(location=[-3.4653, -62.2159], zoom_start=5)  

In [12]:
# Mapping city nodes
for _, row in cities.iterrows():
    folium.CircleMarker(
        location=[row['lng_decimal'], row['lat_decimal']],
        radius=5,
        color='blue',
        fill=True,
        fill_color='blue',
        popup=f"{row['Name']}",
    ).add_to(mymap)

# Mapping of non-city nodes and linkages
for _, row in non_cities_filtered_corrected.iterrows():
    folium.CircleMarker(
        location=[row['lng_decimal'], row['lat_decimal']],
        radius=5,
        color='red',
        fill=True,
        fill_color='red',
        popup=f"{row['Name']}",
    ).add_to(mymap)
    
    # the nearest city
    nearest_city_row = cities[cities['Name'] == row['Nearest_City_Name']].iloc[0]
    
    folium.PolyLine(
        locations=[
            [row['lng_decimal'], row['lat_decimal']],
            [nearest_city_row['lng_decimal'], nearest_city_row['lat_decimal']]
        ],
        color='black',
        weight=1
    ).add_to(mymap)

In [13]:
mymap.save("node_city_map_with_popup.html")  
mymap