In [None]:
import pandas as pd
import folium
from haversine import haversine, Unit
import numpy as np

# Load the CSV data into a DataFrame
file_path = 'data/ut_contaienr_data/environment_202405221219.csv'  # Update this path
df_env = pd.read_csv(file_path)

#drop rows with missing values
df_env = df_env.dropna(subset=['position_coordinates'])

# Extract longitude and latitude from 'position_coordinates'
df_env['position_coordinates'] = df_env['position_coordinates'].str.replace(r'[()]', '', regex=True)
df_env[['longitude', 'latitude']] = df_env['position_coordinates'].str.split(',', expand=True).astype(float)


# Get unique container locations
unique_locations = df_env[['longitude', 'latitude']].drop_duplicates()

# Function to calculate distance matrix
def haversine_distance_matrix(locations):
    num_locations = len(locations)
    distance_matrix = np.zeros((num_locations, num_locations))
    for i in range(num_locations):
        for j in range(i + 1, num_locations):
            distance = haversine((locations.iloc[i]['latitude'], locations.iloc[i]['longitude']),
                                 (locations.iloc[j]['latitude'], locations.iloc[j]['longitude']),
                                 unit=Unit.METERS)
            distance_matrix[i, j] = distance
            distance_matrix[j, i] = distance
    return distance_matrix

# Calculate the distance matrix for unique locations
distance_matrix = haversine_distance_matrix(unique_locations)

# Cluster locations within 50 meters
tolerance = 50
visited = np.zeros(len(unique_locations), dtype=bool)
clusters = []

for i in range(len(unique_locations)):
    if not visited[i]:
        cluster = [i]
        visited[i] = True
        for j in range(i + 1, len(unique_locations)):
            if not visited[j] and distance_matrix[i, j] < tolerance:
                cluster.append(j)
                visited[j] = True
        clusters.append(cluster)

# Find the representative point for each cluster
clustered_locations = []
for cluster in clusters:
    cluster_points = unique_locations.iloc[cluster]
    mean_location = cluster_points.mean()
    clustered_locations.append(mean_location)

clustered_locations = pd.DataFrame(clustered_locations, columns=['longitude', 'latitude'])

# Create a map centered at the first clustered location
initial_location = clustered_locations.iloc[0]
m = folium.Map(location=[initial_location['latitude'], initial_location['longitude']], zoom_start=15)

# Add markers for each clustered location
for index, row in clustered_locations.iterrows():
    folium.Marker([row['latitude'], row['longitude']], popup=f"Longitude: {row['longitude']}, Latitude: {row['latitude']}").add_to(m)

# Save the map to an HTML file

# Display the map (if running in a Jupyter notebook)
display(m)

