In [None]:
# Import and leverage the imported file for analysis
import pandas as pd
import openpyxl
import os
import tkinter as tk
from tkinter import Tk
from tkinter import filedialog
import folium
from sklearn.cluster import DBSCAN
from math import radians, cos, sin, asin, sqrt

Tk().withdraw()
file_path = filedialog.askopenfilename()

In [None]:
# Extract the base name of the file
file_name = os.path.basename(file_path)

# Remove the extension from the file name
file_name_without_extension = os.path.splitext(file_name)[0]

In [None]:
# Read the Excel file into a DataFrame
df = pd.read_excel(file_path)

print(file_name)
print(df.columns)

In [None]:
# Load data from Excel file into DataFrame
data = pd.read_excel(file_path)

In [None]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    km = 6367 * c
    return km

# Check the data types and make any necessary changes
df['level_1'] = pd.to_numeric(df['level_1'])
df['level_2'] = pd.to_numeric(df['level_2'])

# Select desired columns
df = df[['ObjectID', 'Latitude', 'Longitude', 'level_1', 'level_2']]

# Define the clustering parameters
km_epsilon = .75  # distance in kilometers
epsilon = km_epsilon / 1000 / 111.32  # convert to radians

minimum_samples = 3

# Cluster the data using DBSCAN
dbscan = DBSCAN(eps=epsilon, min_samples=minimum_samples)
clusters = dbscan.fit_predict(df[['Longitude', 'Latitude']])

# Add the cluster labels to the dataframe
df['Cluster'] = clusters

# Filter the high risk locations
high_risk = df[(df['level_1'] > 4) & (df['level_2'] > 6)]

# Create a map centered on the mean latitude and longitude
center_lat = df['Latitude'].mean()
center_long = df['Longitude'].mean()
map = folium.Map(location=[center_lat, center_long], zoom_start=10)

# Add the high risk locations to the map and label the clusters
for cluster_id in high_risk['Cluster'].unique():
    cluster_data = high_risk[high_risk['Cluster'] == cluster_id]
    if cluster_id == -1:
        color = 'red'
        cluster_name = 'Noise'
    else:
        color = 'green'
        cluster_name = f'Cluster {cluster_id}'
    cluster_count = len(cluster_data)
    folium.CircleMarker(location=[cluster_data['Latitude'].mean(), cluster_data['Longitude'].mean()],
                        radius=3,
                        color=color,
                        tooltip=f'{cluster_name}, Count: {cluster_count}',
                        popup=f'Cluster Properties: {cluster_data["ObjectID"].tolist()}').add_to(map)

In [None]:
# Print a summary of cluster counts
cluster_counts = high_risk.groupby('Cluster')['ObjectID'].count()
print('\nCluster Counts:')
for cluster_id, count in cluster_counts.items():
    if cluster_id == -1:
        cluster_name = 'Noise'
    else:
        cluster_name = f'Cluster {cluster_id}'
    print(f'{cluster_name}: {count}')

In [None]:
# Display the map
map

In [None]:
# Use the name without extension to create a new file name
new_file_name = file_name_without_extension + '_concentration_analysis' + '.html'

In [None]:
# Save the map to HTML with the new file name
map.save(new_file_name)
print("Map Saved to Directory.")

In [None]:
clustered_properties = df.groupby('Cluster')['ObjectID'].agg(list)
print(clustered_properties)

In [None]:
# Create a file dialog to allow the user to select the save location
root = tk.Tk()
root.withdraw()
default_file_name = file_name_without_extension + "_risk_concentration_clustering_report"
file_path = filedialog.asksaveasfilename(defaultextension='.xlsx', initialfile=default_file_name)

# Write the cluster information to a new sheet in the Excel file
with pd.ExcelWriter(file_path) as writer:
    clustered_properties.to_excel(writer, sheet_name='Cluster Information', index=False)

    # Get cluster counts
    cluster_counts = high_risk.groupby('Cluster')['ObjectID'].count().reset_index()
    cluster_counts.columns = ['Cluster', 'Count']

    # Write cluster counts to new sheet
    cluster_counts.to_excel(writer, sheet_name='Cluster Counts', index=False)

print("Excel write task completed.")
