In [31]:
import folium
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import haversine_distances
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score

In [None]:
# Carregar o dataset de crimes
df = pd.read_csv('crime_dataset.csv')  # Substitua 'crime_dataset.csv' pelo nome do arquivo do seu dataset

In [6]:
X = df[['LATITUDE', 'LONGITUDE']]

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
eps = np.percentile(haversine_distances(X_scaled, X_scaled).flatten(), 5)  # Valor de eps baseado no primeiro quartil
min_samples = 10
dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric='precomputed')
dbscan.fit(haversine_distances(X_scaled, X_scaled))

In [11]:
labels = dbscan.labels_
map_crimes = folium.Map(location=[X['LATITUDE'].mean(), X['LONGITUDE'].mean()], zoom_start=13)

In [17]:
for label in np.unique(labels):
    if label == -1:  # Rótulo para os pontos considerados como ruído
        color = 'gray'
    else:
        color = '#{:02x}{:02x}{:02x}'.format(np.random.randint(256), np.random.randint(256), np.random.randint(256))
    indices = np.where(labels == label)[0]
    for index in indices:
        folium.CircleMarker([X.iloc[index]['LATITUDE'], X.iloc[index]['LONGITUDE']], radius=2, color=color, fill=True,
                            fill_color=color, fill_opacity=0.6).add_to(map_crimes)

In [18]:
#Não consegui abrir a pagina aqui mas ele gerou um HTML que pode ser usado no navegador
map_crimes.save('crime_clusters_dbscan.html')

In [25]:

num_clusters = len(np.unique(labels)) - 1  # Ignorar o rótulo de ruído (-1)
kmeans = KMeans(n_clusters=num_clusters,n_init=10)
kmeans.fit(X_scaled)
labels_kmeans = kmeans.labels_

In [27]:
# Plotar os pontos usando Folium
map_kmeans = folium.Map(location=[X['LATITUDE'].mean(), X['LONGITUDE'].mean()], zoom_start=13)

In [35]:
# Definir cores aleatórias para os clusters do K-Means
cluster_colors_kmeans = ['#{:06x}'.format(np.random.randint(256**3)) for _ in range(num_clusters)]

for label in np.unique(labels_kmeans):
    color = cluster_colors_kmeans[label]
    indices = np.where(labels_kmeans == label)[0]
    for index in indices:
        folium.CircleMarker([X.iloc[index]['LATITUDE'], X.iloc[index]['LONGITUDE']], radius=2, color=color, fill=True,
                            fill_color=color, fill_opacity=0.6).add_to(map_kmeans)

# Salvar o mapa como arquivo HTML
map_kmeans.save('crime_clusters_kmeans.html')

In [36]:
agglomerative = AgglomerativeClustering(n_clusters=num_clusters)
agglomerative.fit(X_scaled)
labels_agg = agglomerative.labels_

In [37]:
silhouette_kmeans = silhouette_score(X_scaled, labels_kmeans)
silhouette_agg = silhouette_score(X_scaled, labels_agg)

print("Silhouette Score - K-Means:", silhouette_kmeans)
print("Silhouette Score - Clusterização Hierárquica:", silhouette_agg)

Silhouette Score - K-Means: 0.41425009106071053
Silhouette Score - Clusterização Hierárquica: 0.40300135142613747
