In [None]:
import pandas as pd
from sklearn.cluster import KMeans
import geohash2
import os

In [None]:
data = pd.read_csv('../dados/dados_tratados/dados_tratados.csv')

In [None]:
def apply_geohash(lat, lon, precision=5):
    return geohash2.encode(lat, lon, precision=precision)

In [None]:
data['geohash'] = data.apply(lambda row: apply_geohash(row['LATITUDE'], row['LONGITUDE']), axis=1)

In [None]:
grouped = data.groupby('geohash')

In [None]:
output_dir = 'clustered_data'
os.makedirs(output_dir, exist_ok=True)

In [None]:
for geohash, group in grouped:
    num_clusters = min(len(group), 5)
    if num_clusters > 1:
        kmeans = KMeans(n_clusters=num_clusters)
        kmeans.fit(group[['LATITUDE', 'LONGITUDE']])
        group['cluster_label'] = kmeans.labels_
    else:
        group['cluster_label'] = 0
    
    clusters = group.groupby('cluster_label')
    for label, cluster in clusters:
        filename = f"{output_dir}/geohash_{geohash}_cluster_{label}.csv"
        cluster.to_csv(filename, index=False)
        print(f"Output written to {filename}")