In [2]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import datetime
import gc
import pandas as pd

import torch
import torch.nn as nn


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import umap
import pacmap

import random
def _set_seed(seed: int):
    """
    Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf`` (if
    installed).
 
    Args:
        seed (:obj:`int`): The seed to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        # ^^ safe to call this function even if cuda is not available
_set_seed(1024)

In [3]:
import numpy as np
import pandas as pd
import torch
import matplotlib.cm as cm
import pickle as pkl
import matplotlib.pyplot as plt
# Single methods
from sklearn.preprocessing import MinMaxScaler as Normalizer
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.neighbors import kneighbors_graph
from sklearn import metrics
from sklearn.metrics import silhouette_samples
from Modules.Clustering_Utils_Zamuro import plot_silhouette
from Modules.Clustering_Utils_Zamuro import plot_centroids
from Modules.Clustering_Utils_Zamuro import ClusteringResults
from sklearn.metrics import davies_bouldin_score as DB
from sklearn.metrics import calinski_harabasz_score as CH
import folium
from folium.plugins import HeatMap

from torch.utils.data import DataLoader
from torch.utils.data import random_split
from Zamuro_DataLoader import SoundscapeData
from Models import ConvAE as AE
from AE_training_functions import TestModel, TrainModel
from AE_Clustering import AE_Clustering 

In [16]:
root = "/home/mirp_ai/Documents/Daniel_Nieto/PhD/AutoEncoders_Ecoacoustics"
root_ai_Zamuro = f"{root}/Zamuro/AI_Caract/AI_Zamuro.csv"
audios = pd.read_csv(f"Complementary_Files/zamuro_audios_complete.csv", index_col=0)
recorders = pd.read_csv(f"Complementary_Files/zamuro_recorders_satelites.csv", index_col=0)
df_ai = pd.read_csv(root_ai_Zamuro, index_col=0)
df_ai.dropna(inplace=True)
df_ai.rename(columns={"file":"y"}, inplace=True)
AIs = np.asarray(df_ai.iloc[:,2::])

In [24]:
Clustering = KMeans(n_clusters=10, random_state=0).fit(AIs)
df_ai['cluster'] = Clustering.labels_
df_ai.set_index("y", inplace=True, drop=False)
df_ai.rename_axis('Filename_', inplace=True)

audios_filtered = audios[audios.index.isin(df_ai.index)]
audios_final = audios_filtered.join(df_ai['cluster'], how='left')


conteo_clusters = audios_final.groupby(['location', 'cluster']).size().unstack(fill_value=0)
conteo_clusters.columns = [f'cluster_{col}' for col in conteo_clusters.columns]
recorders_clusters = recorders.join(conteo_clusters, on='field_number_PR')

In [26]:
recorders_clusters

Unnamed: 0,field_number_PR,longitud_IG,latitude_IG,day_init_TI,day_end_TI,samp_rate_RE,habitat,pasture,savanna,forest,...,cluster_0,cluster_1,cluster_2,cluster_3,cluster_4,cluster_5,cluster_6,cluster_7,cluster_8,cluster_9
0,RZUA01,-73.411659,3.555860,3,14,192000,bosque,1.000000,0.000000,0.000000,...,70,0,80,163,3,278,14,84,74,273
1,RZUA02,-73.411659,3.552269,3,14,192000,bosque,1.000000,0.000000,0.000000,...,118,0,18,71,108,66,0,2,191,103
2,RZUA03b,-73.411520,3.548784,8,14,192000,pastizal,0.705108,0.294892,0.000000,...,12,0,24,81,0,90,0,288,0,78
3,RZUA04,-73.411514,3.544981,3,14,192000,borde,0.589278,0.410722,0.000000,...,88,0,55,76,15,138,68,48,60,120
4,RZUA05,-73.411571,3.541357,5,13,192000,borde,0.892664,0.000000,0.107336,...,152,0,159,50,0,151,33,32,61,140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,RZUH10,-73.386542,3.523509,4,12,192000,borde,0.100542,0.311728,0.587730,...,85,0,193,123,3,74,50,27,53,162
90,RZUH11,-73.386250,3.519590,4,12,192000,sabana,0.000000,0.734056,0.265944,...,197,0,0,52,9,24,0,330,94,62
91,RZUH12,-73.386458,3.516031,4,12,192000,sabana,0.149383,0.850617,0.000000,...,208,0,0,31,5,122,0,86,245,73
92,RZUH13,-73.386398,3.512511,4,12,192000,borde,0.492125,0.286392,0.221483,...,67,0,151,46,5,46,190,79,18,166


In [75]:
# Supongamos que quieres verificar los datos para una grabadora específica, por ejemplo, "grabadora_1"
grabadora = "RZUH12"  # Cambia esto al nombre de la grabadora que quieres verificar

# Filtrar los datos de audios_final para esta grabadora específica
audios_grabadora = audios_final[audios_final['location'] == grabadora]

# Contar la cantidad de audios en cada cluster para esta grabadora
conteo_por_cluster = audios_grabadora['cluster'].value_counts()

# Mostrar el resultado
print(conteo_por_cluster)

2    265
0    224
1    142
Name: cluster, dtype: int64


In [27]:
conteo_clusters = audios_final['cluster'].value_counts()

# Mostrar el resultado
print(conteo_clusters)

0    12855
9    12055
8     9278
7     8954
5     8868
3     8597
2     6054
6     2925
4     1474
1        1
Name: cluster, dtype: int64


In [58]:
%matplotlib qt
mapa = folium.Map(location=[3.515, -73.411489], zoom_start=13.2)

puntos = recorders_clusters[['latitude_IG', 'longitud_IG', 'cluster_2']].values.tolist()
valores = recorders_clusters["cluster_2"].tolist()
HeatMap(puntos, radius=18, ).add_to(mapa)
mapa

In [55]:
import geopandas as gpd
from shapely.geometry import Point
import rasterio
from scipy.interpolate import griddata


NameError: name 'df' is not defined

In [53]:
geometry = [Point(xy) for xy in zip(df['XCOORD.y_x'], df['YCOORD.y_x'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Cargar el archivo TIF para obtener los límites del mapa
tif_path = '/content/drive/MyDrive/Doctorado/2023/Julio/capaszamuro/AD.tif'

with rasterio.open(tif_path) as tif:
    bounds = tif.bounds

Collecting rasterio
  Downloading rasterio-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting affine
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting click-plugins
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting numpy>=1.24
  Downloading numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: numpy, cligj, click-plugins, affine, rasterio
  Attempting uninstall: numpy
    Found existing installation: numpy 1.23.1
    Uninstalling numpy-1.23.1:
      Successfully uninstalled numpy-1.23.1
[31mERROR: pip's depende