In [None]:
# Import and leverage the imported file for analysis
import pandas as pd
import openpyxl
import os
import tkinter as tk
from tkinter import Tk
from tkinter import filedialog
import folium
from sklearn.cluster import DBSCAN

Tk().withdraw()
file_path = filedialog.askopenfilename()

In [None]:
# Extract the base name of the file
file_name = os.path.basename(file_path)

# Remove the extension from the file name
file_name_without_extension = os.path.splitext(file_name)[0]

In [None]:
# Read the Excel file into a DataFrame
df = pd.read_excel(file_path)

print(file_name)
print(df.columns)

In [None]:
# Load data from Excel file into DataFrame
data = pd.read_excel(file_path)

In [None]:
# Check the data types and make any necessary changes
df['level_1'] = pd.to_numeric(df['level_1'])
df['level_2'] = pd.to_numeric(df['level_2'])

# Select desired columns
df = df[['ObjectID', 'Latitude', 'Longitude', 'level_1', 'level_2']]

# Define the clustering parameters
epsilon = 0.1
minimum_samples = 3

# Cluster the data using DBSCAN
dbscan = DBSCAN(eps=epsilon, min_samples=minimum_samples)
clusters = dbscan.fit_predict(df[['Longitude', 'Latitude']])

# Add the cluster labels to the dataframe
df['Cluster'] = clusters

# Filter the high risk locations
high_risk = df[(df['level_1'] > 4) & (df['level_2'] > 6)]

# Filter clusters with less than 3 properties
for cluster_id in high_risk['Cluster'].unique():
    cluster_data = high_risk[high_risk['Cluster'] == cluster_id]
    if cluster_id != -1 and len(cluster_data) < 3:
        high_risk = high_risk[high_risk['Cluster'] != cluster_id]

# Create a map centered on the mean latitude and longitude
center_lat = df['Latitude'].mean()
center_long = df['Longitude'].mean()
map = folium.Map(location=[center_lat, center_long], zoom_start=10)

# Add the high risk locations to the map and label the clusters
for cluster_id in high_risk['Cluster'].unique():
    cluster_data = high_risk[high_risk['Cluster'] == cluster_id]
    if cluster_id == -1:
        color = 'red'
        cluster_name = 'Noise'
    else:
        color = 'green'
        cluster_name = f'Cluster {cluster_id}'
    cluster_count = len(cluster_data)
    folium.CircleMarker(location=[cluster_data['Latitude'].mean(), cluster_data['Longitude'].mean()],
                        radius=3,
                        color=color,
                        tooltip=f'{cluster_name}, Count: {cluster_count}',
                        popup=f'Cluster Properties: {cluster_data["ObjectID"].tolist()}').add_to(map)

    # Print a list of properties for the cluster
    print(f'{cluster_name}: {cluster_data["ObjectID"].tolist()}')

# Print a summary of cluster counts
cluster_counts = high_risk.groupby('Cluster')['ObjectID'].count()
print('\nCluster Counts:')
for cluster_id, count in cluster_counts.items():
    if cluster_id == -1:
        cluster_name = 'Noise'
    else:
        cluster_name = f'Cluster {cluster_id}'
    print(f'{cluster_name}: {count}')

In [None]:
# Display the map
map

In [None]:
# Save the map to HTML
map.save(f"{file_name}_risk_concentration_map.html")
print("Map Saved to Directory.")