In [4]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import sys
module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup
from matplotlib.colors import ListedColormap
import folium
from folium.plugins import MarkerCluster
import leafmap
from minisom import MiniSom
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import silhouette_score
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import StandardScaler

In [2]:
schema = 'projects_research'
table = 'arroyovivo_residuos_hex'
query = f"SELECT * FROM {schema}.{table}"
residuos = aup.gdf_from_query(query, geometry_col='geometry')

print(residuos.shape)
residuos.head(1)

(562, 51)


Unnamed: 0,hex_id,res,geometry,aluminio,aluminio_quantity,baterias,baterias_quantity,carton,carton_quantity,cenizas,...,unicel,unicel_quantity,vidrio,vidrio_quantity,epi,epi_quantity,total_points,most_common_point,total_quantity,most_common_quantity
0,8a48a20296a7fff,10,"POLYGON ((-100.30269 25.63956, -100.30201 25.6...",13.0,24.0,0.0,0.0,36.0,44.0,0.0,...,207.0,259.0,89.0,137.0,1.0,1.0,1918.0,Plastico,2669.0,Plastico_quantity


In [3]:
residuos.columns

Index(['hex_id', 'res', 'geometry', 'aluminio', 'aluminio_quantity',
       'baterias', 'baterias_quantity', 'carton', 'carton_quantity', 'cenizas',
       'cenizas_quantity', 'ceramica', 'ceramica_quantity', 'electronica',
       'electronica_quantity', 'escombros', 'escombros_quantity', 'goma',
       'goma_quantity', 'llantas', 'llantas_quantity', 'madera',
       'madera_quantity', 'metal', 'metal_quantity', 'organico',
       'organico_quantity', 'otros', 'otros_quantity', 'papel',
       'papel_quantity', 'pesca', 'pesca_quantity', 'plastico',
       'plastico_quantity', 'salud e higiene', 'salud e higiene_quantity',
       'tabaco', 'tabaco_quantity', 'tela', 'tela_quantity', 'unicel',
       'unicel_quantity', 'vidrio', 'vidrio_quantity', 'epi', 'epi_quantity',
       'total_points', 'most_common_point', 'total_quantity',
       'most_common_quantity'],
      dtype='object')

## Estandarización de los datos

In [5]:
coldatos = ['aluminio', 'aluminio_quantity',
       'baterias', 'baterias_quantity', 'carton', 'carton_quantity', 'cenizas',
       'cenizas_quantity', 'ceramica', 'ceramica_quantity', 'electronica',
       'electronica_quantity', 'escombros', 'escombros_quantity', 'goma',
       'goma_quantity', 'llantas', 'llantas_quantity', 'madera',
       'madera_quantity', 'metal', 'metal_quantity', 'organico',
       'organico_quantity', 'otros', 'otros_quantity', 'papel',
       'papel_quantity', 'pesca', 'pesca_quantity', 'plastico',
       'plastico_quantity', 'salud e higiene', 'salud e higiene_quantity',
       'tabaco', 'tabaco_quantity', 'tela', 'tela_quantity', 'unicel',
       'unicel_quantity', 'vidrio', 'vidrio_quantity', 'epi', 'epi_quantity']

# Create a StandardScaler
scaler = StandardScaler()

# Standardize the selected columns
residuos[coldatos] = scaler.fit_transform(residuos[coldatos])
residuos.head()

Unnamed: 0,hex_id,res,geometry,aluminio,aluminio_quantity,baterias,baterias_quantity,carton,carton_quantity,cenizas,...,unicel,unicel_quantity,vidrio,vidrio_quantity,epi,epi_quantity,total_points,most_common_point,total_quantity,most_common_quantity
0,8a48a20296a7fff,10,"POLYGON ((-100.30269 25.63956, -100.30201 25.6...",11.669761,9.057943,-0.103882,-0.084667,13.63021,12.220525,-0.127573,...,18.570515,14.079036,14.658037,12.070278,4.546442,2.89347,1918.0,Plastico,2669.0,Plastico_quantity
1,8a48a202978ffff,10,"POLYGON ((-100.30331 25.63839, -100.30263 25.6...",8.900164,12.54463,-0.103882,-0.084667,4.675946,3.926608,7.838651,...,4.922867,12.915738,4.036645,4.656423,-0.167765,-0.151718,1122.0,Plastico,2473.0,Plastico_quantity
2,8a48a202baaffff,10,"POLYGON ((-100.29357 25.64128, -100.29289 25.6...",-0.331826,-0.239891,-0.103882,-0.084667,1.56142,7.072577,-0.127573,...,0.009714,1.892108,0.327587,0.497432,-0.167765,-0.151718,847.0,Plastico,2994.0,Plastico_quantity
3,8a48a202ba1ffff,10,"POLYGON ((-100.29499 25.64119, -100.29431 25.6...",2.437771,0.922338,-0.103882,-0.084667,0.004156,-0.077352,-0.127573,...,1.19251,1.227367,8.58867,4.475598,-0.167765,-0.151718,810.0,Plastico,1672.0,Plastico_quantity
4,8a48a202ba0ffff,10,"POLYGON ((-100.29641 25.64109, -100.29573 25.6...",0.591373,0.147519,-0.103882,-0.084667,0.393472,0.208646,-0.127573,...,0.919557,1.116576,2.85649,3.209818,-0.167765,-0.151718,708.0,Plastico,1820.0,Plastico_quantity


### Verificar estandarización

In [6]:
print("Media después de estandarización:")
print(residuos[coldatos].mean())

print("\nDesviación estándar después de estandarización:")
print(residuos[coldatos].std())

Media después de estandarización:
aluminio                   -5.057244e-17
aluminio_quantity          -2.528622e-17
baterias                    6.321555e-18
baterias_quantity          -1.264311e-17
carton                      0.000000e+00
carton_quantity             0.000000e+00
cenizas                    -6.321555e-18
cenizas_quantity            0.000000e+00
ceramica                    5.057244e-17
ceramica_quantity          -2.528622e-17
electronica                 0.000000e+00
electronica_quantity       -2.528622e-17
escombros                   2.528622e-17
escombros_quantity          0.000000e+00
goma                        2.528622e-17
goma_quantity               2.528622e-17
llantas                     0.000000e+00
llantas_quantity            0.000000e+00
madera                      2.528622e-17
madera_quantity             0.000000e+00
metal                       2.528622e-17
metal_quantity              0.000000e+00
organico                    2.528622e-17
organico_quantity      

# Dividir Dataset en Material y Material_quantity

## Material 

In [8]:
col_material = ['hex_id', 'res','geometry', 'aluminio', 'baterias', 'carton','cenizas','ceramica','electronica','escombros',
                'goma','llantas','madera','metal','organico','otros','papel','pesca','plastico','salud e higiene',
                'tabaco','tela','unicel','vidrio','epi','total_points', 'most_common_point'] 
data_material = residuos[col_material].copy()
print(data_material.shape)

(562, 27)


### Res 11

In [82]:
data_material_11 = data_material[data_material['res'] == 11].copy()
print(data_material_11.shape)
data_material_11.head(2)

(420, 27)


Unnamed: 0,hex_id,res,geometry,aluminio,baterias,carton,cenizas,ceramica,electronica,escombros,...,pesca,plastico,salud e higiene,tabaco,tela,unicel,vidrio,epi,total_points,most_common_point
6,8b48a202baacfff,11,"POLYGON ((-100.29357 25.64128, -100.29339 25.6...",-0.331826,-0.103882,1.56142,-0.127573,-0.32908,-0.220429,4.817522,...,-0.134595,2.010942,0.588397,-0.455406,12.467905,0.009714,-0.0096,-0.167765,656.0,Plastico
9,8b48a20296a6fff,11,"POLYGON ((-100.30220 25.63975, -100.30201 25.6...",3.36097,-0.103882,2.340051,-0.127573,0.665238,2.257194,-0.034706,...,-0.134595,3.213488,2.034515,-0.455406,2.349179,4.376961,4.879613,-0.167765,517.0,Plastico


### SOM

In [58]:
data_material_11.values.shape

(420, 27)

In [60]:
data = data_material_11.iloc[:, 3:-2].values
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data)
neurons_a = 15
neurons_b = 15
som = MiniSom(neurons_a, neurons_b, data_normalized.values.shape[1], random_seed=0, learning_rate=.1, sigma=1.5)
som.pca_weights_init(data_normalized.values)
som.train(data_normalized.values, 10000, verbose=True)

AttributeError: 'numpy.ndarray' object has no attribute 'values'

In [None]:
# Assuming 'gdf' is your GeoDataFrame
# Extract the relevant columns for clustering
#columns_for_clustering = data_material_11.iloc[, 3:-2]

# Select the data and normalize it
data = data_material_11.iloc[:, 3:-2].values
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data)

# Set the SOM parameters (you might need to tune these based on your data)
som_size = (2000, 2000)  # Adjust the grid size based on the expected number of clusters
learning_rate = 0.5
sigma = 1.0
epochs = 1000

# Initialize the SOM
som = MiniSom(som_size[0], som_size[1], len(columns_for_clustering), sigma=sigma, learning_rate=learning_rate)

# Train the SOM
som.train(data_normalized, 1000, random_order=True, verbose=True)

winning_nodes = np.array([som.winner(data_point) for data_point in data_normalized])

# Convert winning_nodes to tuples
winning_nodes_tuples = [tuple(node) for node in winning_nodes]

# Create a new DataFrame with 'cluster' information
cluster_df = pd.DataFrame({'cluster': som.labels_map(data_normalized, winning_nodes_tuples)})

# Reset the index of both DataFrames
data_material_11.reset_index(drop=True, inplace=True)
cluster_df.reset_index(drop=True, inplace=True)

# Concatenate the two DataFrames
data_material_11 = pd.concat([data_material_11, cluster_df], axis=1)

 [ 1000 / 1000 ] 100% - 0:00:04 left 

In [79]:
# Print or inspect the resulting GeoDataFrame
data_material_11.head()

Unnamed: 0,hex_id,res,geometry,aluminio,baterias,carton,cenizas,ceramica,electronica,escombros,...,plastico,salud e higiene,tabaco,tela,unicel,vidrio,epi,total_points,most_common_point,cluster
0,8b48a202baacfff,11,"POLYGON ((-100.29357 25.64128, -100.29339 25.6...",-0.331826,-0.103882,1.56142,-0.127573,-0.32908,-0.220429,4.817522,...,2.010942,0.588397,-0.455406,12.467905,0.009714,-0.0096,-0.167765,656.0,Plastico,"{(9, 25): 1}"
1,8b48a20296a6fff,11,"POLYGON ((-100.30220 25.63975, -100.30201 25.6...",3.36097,-0.103882,2.340051,-0.127573,0.665238,2.257194,-0.034706,...,3.213488,2.034515,-0.455406,2.349179,4.376961,4.879613,-0.167765,517.0,Plastico,"{(18, 3): 3}"
2,8b48a202ba0afff,11,"POLYGON ((-100.29585 25.64177, -100.29567 25.6...",-0.331826,-0.103882,0.393472,-0.127573,-0.32908,-0.220429,5.874443,...,2.508966,1.070436,-0.455406,3.513925,0.646604,1.676336,-0.167765,490.0,Plastico,"{(18, 4): 1}"
3,8b48a202baa5fff,11,"POLYGON ((-100.29320 25.64050, -100.29302 25.6...",-0.331826,-0.103882,-0.385159,-0.127573,0.665238,-0.220429,9.621708,...,1.792298,-0.375682,-0.455406,1.621214,0.009714,1.676336,-0.167765,452.0,Escombros,"{(43, 0): 1}"
4,8b48a20296a5fff,11,"POLYGON ((-100.30312 25.63985, -100.30294 25.6...",3.36097,-0.103882,2.729367,-0.127573,0.665238,-0.220429,0.061377,...,3.444279,2.034515,0.667129,0.529265,2.830228,0.833368,4.546442,430.0,Plastico,"{(43, 1): 3}"


In [80]:
data_material_11['cluster']

0      {(9, 25): 1}
1      {(18, 3): 3}
2      {(18, 4): 1}
3      {(43, 0): 1}
4      {(43, 1): 3}
           ...     
415             NaN
416             NaN
417             NaN
418             NaN
419             NaN
Name: cluster, Length: 420, dtype: object

In [81]:
pd.isna(data_material_11).sum( )

hex_id                 0
res                    0
geometry               0
aluminio               0
baterias               0
carton                 0
cenizas                0
ceramica               0
electronica            0
escombros              0
goma                   0
llantas                0
madera                 0
metal                  0
organico               0
otros                  0
papel                  0
pesca                  0
plastico               0
salud e higiene        0
tabaco                 0
tela                   0
unicel                 0
vidrio                 0
epi                    0
total_points           0
most_common_point      0
cluster              358
dtype: int64

### Res 10

In [32]:
data_material_10 = data_material[data_material['res'] == 10].copy()
print(data_material_10.shape)
data_material_10.head(2)

(142, 27)


Unnamed: 0,hex_id,res,geometry,aluminio,baterias,carton,cenizas,ceramica,electronica,escombros,...,pesca,plastico,salud e higiene,tabaco,tela,unicel,vidrio,epi,total_points,most_common_point
0,8a48a20296a7fff,10,"POLYGON ((-100.30269 25.63956, -100.30201 25.6...",11.669761,-0.103882,13.63021,-0.127573,9.614101,2.257194,0.782005,...,-0.134595,14.206453,13.603459,6.841071,5.843415,18.570515,14.658037,4.546442,1918.0,Plastico
1,8a48a202978ffff,10,"POLYGON ((-100.30331 25.63839, -100.30263 25.6...",8.900164,-0.103882,4.675946,7.838651,1.659556,4.734816,0.974173,...,-0.134595,8.837513,5.40879,3.473467,5.625026,4.922867,4.036645,-0.167765,1122.0,Plastico


## Material Quantity

### Res 11

### Res 10