In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import seaborn as sns
from datetime import timedelta
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from collections import defaultdict

In [None]:
#importing the data
gps_data = pd.read_csv('https://gps-london.s3.eu-west-2.amazonaws.com/gps.csv')
gps_data.head()

In [None]:
#checking the data for cleaning and preprocessing
gps_data.dtypes

In [None]:
gps_data['user_id'].unique()

In [None]:
gps_data.describe()

In [None]:
#checking for duplicate values
duplicate = gps_data.duplicated()
duplicate

In [None]:
[gps_data['lat'].value_counts()]

In [None]:
[gps_data['lon'].value_counts()]

In [None]:
[gps_data['user_id'].value_counts()]

In [None]:
#checking for missing values
gps_data.isnull().sum()

In [None]:
#Converting the 'datetime' column to datetime format
gps_data['datetime'] = pd.to_datetime(gps_data['datetime'])

In [None]:
#Sorting the DataFrame by user_id and datetime
gps_data.sort_values(by=['user_id', 'datetime'], inplace=True)

#Calculating the time differences between consecutive rows for each user
gps_data['time_diff'] = gps_data.groupby('user_id')['datetime'].diff()

#Setting a threshold for dwell time ( 10 minutes)
dwell_threshold = timedelta(minutes=10)

# Identify and filter out data points where time difference exceeds the dwell threshold
clean_data = gps_data[gps_data['time_diff'].fillna(pd.Timedelta(0)) < dwell_threshold]

# Dropping the temporary 'time_diff' column
clean_data = clean_data.drop(columns=['time_diff'])
# Display the cleaned DataFrame
print(clean_data)

In [None]:
from sklearn.cluster import DBSCAN

# Initializing a DataFrame to store DBSCAN clustering results for all users
dbscan_clustered_data = pd.DataFrame()

# Setting the epsilon and minimum samples based on the characteristics of your data
epsilon_dbscan = 0.0005 
min_samples_dbscan = 5    

for user_id in clean_data['user_id'].unique():
    
    # Extracting data for the current user
    user_data = clean_data[clean_data['user_id'] == user_id].copy()  # Make a copy to avoid SettingWithCopyWarning

    # Extracting lat and lon columns for clustering
    coordinates = user_data[['lat', 'lon']]
    
    # Applying DBSCAN clustering
    dbscan = DBSCAN(eps=epsilon_dbscan, min_samples=min_samples_dbscan)
    user_data.loc[:, 'dbscan_cluster'] = dbscan.fit_predict(coordinates)
    
    # Concatenate the clustered user_data to the overall dbscan_clustered_data
    dbscan_clustered_data = pd.concat([dbscan_clustered_data, user_data])




In [None]:
# Plotting the DBSCAN clustered data 
for user_id in dbscan_clustered_data['user_id'].unique():
    user_data_dbscan = dbscan_clustered_data[dbscan_clustered_data['user_id'] == user_id]
    
    plt.figure(figsize=(10, 6))
    plt.scatter(user_data_dbscan['lon'], user_data_dbscan['lat'], c=user_data_dbscan['dbscan_cluster'], cmap='viridis', marker='o')
    plt.title(f'DBSCAN Clustered GPS Data - User {user_id}')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.show()

In [None]:
#Finding the individual dwells for dbscan
# Initialize a dictionary to store dwell frequencies for each user and each DBSCAN cluster
dwell_frequencies_dbscan = defaultdict(list)

# Iterate through each user
for user_id in dbscan_clustered_data['user_id'].unique():
    # Extract data for the current user
    user_data_dbscan = dbscan_clustered_data[dbscan_clustered_data['user_id'] == user_id]

    # Identify dwell times for each DBSCAN cluster
    cluster_counts_dbscan = user_data_dbscan['dbscan_cluster'].value_counts().sort_index()
    dwell_frequencies_dbscan[user_id].extend(cluster_counts_dbscan.tolist() + [0] * (len(set(user_data_dbscan['dbscan_cluster'])) - 1))



In [None]:

# Creating DataFrame to store dwell times for DBSCAN
dwell_times_dbscan = pd.DataFrame.from_dict(dwell_frequencies_dbscan, orient='index').T
dwell_times_dbscan['user_id'] = dwell_times_dbscan.index

# Calculating time differences between consecutive rows for each user
dwell_times_dbscan['time_diff'] = dwell_times_dbscan.groupby('user_id')['datetime'].diff()

# Calculating dwell duration for DBSCAN
dwell_times_dbscan['dwell_duration'] = dwell_times_dbscan.max(axis=1) * time_diff

# Filtering out clusters representing movement based on minimum dwell duration
min_dwell_duration = pd.Timedelta(minutes=10)  
valid_dwells_dbscan = dwell_times_dbscan[dwell_times_dbscan['dwell_duration'] >= min_dwell_duration]


In [None]:
# Iterating through each user
for user_id in valid_dwells_dbscan['user_id'].unique():
    # Extract valid dwells data for the current user
    user_valid_dwells_dbscan = valid_dwells_dbscan[valid_dwells_dbscan['user_id'] == user_id]

    # Print estimated individual dwells for the current user using DBSCAN
    print(f"Estimated Individual Dwells for User {user_id} - DBSCAN:")
    print(user_valid_dwells_dbscan[['min', 'max', 'dwell_duration']])
    print("\n")


In [None]:
# Print dwell duration statistics for DBSCAN
print("Dwell Duration Statistics - DBSCAN:")
print(valid_dwells_dbscan['dwell_duration'].describe())

In [None]:
# Initialize a DataFrame to store k-means clustering results for all users
kmeans_clustered_data = pd.DataFrame()
# Extract lat and lon columns for clustering
coordinates = clean_data[['lat', 'lon']]

# Using the Elbow Method to determine the optimal number of clusters
inertias = []
K_range = range(1, 21)
for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(coordinates)
    inertias.append(kmeans.inertia_)


In [None]:
# Plotting the Elbow Method
plt.plot(K_range, inertias, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k in KMeans')
plt.show()

In [None]:
# Plotting the k-means clustered data 
optimal_k = 10  

# Apply k-means clustering with the optimal number of clusters
for user_id in clean_data['user_id'].unique():
    user_data = clean_data[clean_data['user_id'] == user_id].copy()
    # Applying k-means clustering
    kmeans = KMeans(n_clusters=optimal_k, random_state=42)
    user_data.loc[:, 'kmeans_cluster'] = kmeans.fit_predict(user_data[['lat', 'lon']])

    kmeans_clustered_data = pd.concat([kmeans_clustered_data, user_data])
    


In [None]:
# Plotting the k-means clustered data 
for user_id in kmeans_clustered_data['user_id'].unique():
    user_data_kmeans = kmeans_clustered_data[kmeans_clustered_data['user_id'] == user_id]
    
    plt.figure(figsize=(10, 6))
    plt.scatter(user_data_kmeans['lon'], user_data_kmeans['lat'], c=user_data_kmeans['kmeans_cluster'], cmap='viridis', marker='o')
    plt.title(f'K-Means Clustered GPS Data - User {user_id}')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.show()

In [None]:
# finding the individual dwells for kmeans
# Initialize dictionaries to store dwell frequencies for each user
dwell_frequencies_kmeans = defaultdict(list)

# Iterate through each user
for user_id in kmeans_clustered_data['user_id'].unique():
    # Extract data for the current user
    user_data_kmeans = kmeans_clustered_data[kmeans_clustered_data['user_id'] == user_id]

    # Identify dwell times for each k-means cluster
    cluster_counts_kmeans = user_data_kmeans['kmeans_cluster'].value_counts().sort_index()
    dwell_frequencies_kmeans[user_id].extend(cluster_counts_kmeans.tolist() + [0] * (num_clusters_kmeans - len(cluster_counts_kmeans)))


In [None]:
# Iterate through each user for k-means
for user_id in kmeans_clustered_data['user_id'].unique():
    # Extract data for the current user
    user_data_kmeans = kmeans_clustered_data[kmeans_clustered_data['user_id'] == user_id]

    # Update max_clusters_kmeans based on the current user's maximum cluster value
    max_clusters_kmeans = max(max_clusters_kmeans, max(user_data_kmeans['kmeans_cluster']) + 1)


In [None]:
# Creating DataFrames to store dwell times for k-means and DBSCAN
dwell_times_kmeans = pd.DataFrame.from_dict(dwell_frequencies_kmeans, orient='index').T

# Assuming that 'user_id' is a column in your DataFrames
dwell_times_kmeans['user_id'] = dwell_times_kmeans.index
#calculating the time difference
dwell_times_kmeans['time_diff'] = dwell_times_kmeans.groupby('user_id')['datetime'].diff()

# Calculating dwell duration for k-means and DBSCAN
dwell_times_kmeans['dwell_duration'] = dwell_times_kmeans.max(axis=1) * time_diff

# Filtering out clusters representing movement based on minimum dwell duration  
valid_dwells_kmeans = dwell_times_kmeans[dwell_times_kmeans['dwell_duration'] >= min_dwell_duration]

In [None]:

for user_id in valid_dwells_kmeans['user_id'].unique():
    # Extract valid dwells data for the current user
    user_valid_dwells_kmeans = valid_dwells_kmeans[valid_dwells_kmeans['user_id'] == user_id]

    # Print estimated individual dwells for the current user using K-Means
    print(f"Estimated Individual Dwells for User {user_id} - K-Means:")
    print(user_valid_dwells_kmeans[['min', 'max', 'dwell_duration']])
    print("\n")

In [None]:
# Printing dwell duration statistics for K-Means
print("Dwell Duration Statistics - K-Means:")
print(valid_dwells_kmeans['dwell_duration'].describe())


In [None]:
# Exploration of patterns and frequencies

#Plotting distribution of data among different clusters for K-Means

for user_id in kmeans_clustered_data['user_id'].unique():
    # Extracting data for the current user
    user_data_kmeans = kmeans_clustered_data[kmeans_clustered_data['user_id'] == user_id]

    plt.figure(figsize=(12, 6))
    sns.countplot(x='kmeans_cluster', data=user_data_kmeans)
    plt.title(f'Cluster Frequencies - User {user_id} (K-Means)')
    plt.xlabel('Cluster')
    plt.ylabel('Frequency')
    plt.show()

In [None]:
#Plotting distribution of data among different clusters for DBSCAN
for user_id in dbscan_clustered_data['user_id'].unique():
    # Extracting data for the current user
    user_data_dbscan = dbscan_clustered_data[dbscan_clustered_data['user_id'] == user_id]

    plt.figure(figsize=(9, 6))
    sns.countplot(x='dbscan_cluster', data=user_data_dbscan)
    plt.title(f'Cluster Frequencies - User {user_id} (DBSCAN)')
    plt.xlabel('Cluster')
    plt.ylabel('Frequency')
    plt.show()

In [None]:
# Exploring the dwell durations
# Plotting the dwell durations for K-Means clusters
for user_id in kmeans_clustered_data['user_id'].unique():
    # Extract data for the current user
    user_data_kmeans = kmeans_clustered_data[kmeans_clustered_data['user_id'] == user_id]

    # Calculating dwell durations for K-Means clusters
    dwell_times_kmeans = user_data_kmeans.groupby('kmeans_cluster')['datetime'].agg(['min', 'max'])
    dwell_times_kmeans['dwell_duration'] = (dwell_times_kmeans['max'] - dwell_times_kmeans['min']).dt.total_seconds() / 60

    plt.figure(figsize=(12, 6))
    sns.barplot(x=dwell_times_kmeans.index, y='dwell_duration', data=dwell_times_kmeans)
    plt.title(f'Dwell Durations - User {user_id} (K-Means)')
    plt.xlabel('Cluster')
    plt.ylabel('Dwell Duration (minutes)')
    plt.show()

In [None]:
# Plotting the dwell durations for DBSCAN clusters
for user_id in dbscan_clustered_data['user_id'].unique():
    # Extracting data for the current user
    user_data_dbscan = dbscan_clustered_data[dbscan_clustered_data['user_id'] == user_id]

    # Calculating dwell durations for DBSCAN clusters
    dwell_times_dbscan = user_data_dbscan.groupby('dbscan_cluster')['datetime'].agg(['min', 'max'])
    dwell_times_dbscan['dwell_duration'] = (dwell_times_dbscan['max'] - dwell_times_dbscan['min']).dt.total_seconds() / 60

    plt.figure(figsize=(9, 6))
    sns.barplot(x=dwell_times_dbscan.index, y='dwell_duration', data=dwell_times_dbscan)
    plt.title(f'Dwell Durations - User {user_id} (DBSCAN)')
    plt.xlabel('Cluster')
    plt.ylabel('Dwell Duration (minutes)')
    plt.show()

In [None]:
# Plotting dwell durations distribution for Kmeans
for user_id in valid_dwells_kmeans['user_id'].unique():
    # Extracting valid dwell durations data for the current user using K-Means
    user_valid_dwells_kmeans = valid_dwells_kmeans[valid_dwells_kmeans['user_id'] == user_id]

    plt.figure(figsize=(10, 4))
    plt.hist(user_valid_dwells_kmeans['dwell_duration'].dt.total_seconds() / 60, bins=20, color='blue', alpha=0.7)
    plt.title(f'Dwell Durations Distribution - User {user_id} - K-Means')
    plt.xlabel('Dwell Duration (minutes)')
    plt.ylabel('Frequency')
    plt.show()

In [None]:
# Plotting dwell durations distribution for DBSCAN
for user_id in valid_dwells_dbscan['user_id'].unique():
    # Extract valid dwell durations data for the current user using DBSCAN
    user_valid_dwells_dbscan = valid_dwells_dbscan[valid_dwells_dbscan['user_id'] == user_id]

    plt.figure(figsize=(10, 4))
    plt.hist(user_valid_dwells_dbscan['dwell_duration'].dt.total_seconds() / 60, bins=20, color='green', alpha=0.7)
    plt.title(f'Dwell Durations Distribution - User {user_id} - DBSCAN')
    plt.xlabel('Dwell Duration (minutes)')
    plt.ylabel('Frequency')
    plt.show()

In [None]:
# Plot dwell duration vs. time of day for k-means
for user_id in valid_dwells_kmeans['user_id'].unique():
    # Extract valid dwell durations data for the current user using K-Means
    user_valid_dwells_kmeans = valid_dwells_kmeans[valid_dwells_kmeans['user_id'] == user_id]

    plt.figure(figsize=(12, 6))
    plt.scatter(user_valid_dwells_kmeans['min'], user_valid_dwells_kmeans['dwell_duration'].dt.total_seconds() / 60, color='blue', alpha=0.7)
    plt.title(f'Dwell Duration vs. Time of Day - User {user_id} - K-Means')
    plt.xlabel('Time of Day')
    plt.ylabel('Dwell Duration (minutes)')
    plt.show()


In [None]:
# Plotting dwell duration vs. time of day for DBSCAN
for user_id in valid_dwells_dbscan['user_id'].unique():
    # Extract valid dwell durations data for the current user using DBSCAN
    user_valid_dwells_dbscan = valid_dwells_dbscan[valid_dwells_dbscan['user_id'] == user_id]

    plt.figure(figsize=(12, 6))
    plt.scatter(user_valid_dwells_dbscan['min'], user_valid_dwells_dbscan['dwell_duration'].dt.total_seconds() / 60, color='green', alpha=0.7)
    plt.title(f'Dwell Duration vs. Time of Day - User {user_id} - DBSCAN')
    plt.xlabel('Time of Day')
    plt.ylabel('Dwell Duration (minutes)')
    plt.show()


In [None]:
# Plotting daily variation of GPS points for K-Means
# Set the style for the plots
sns.set(style="whitegrid")

# Iterate through each user for K-Means
for user_id in kmeans_clustered_data['user_id'].unique():
    user_data_kmeans = kmeans_clustered_data[kmeans_clustered_data['user_id'] == user_id]
    
    plt.figure(figsize=(12, 6))
    sns.scatterplot(x='datetime', y='lat', hue='kmeans_cluster', data=user_data_kmeans)
    plt.title(f'Daily Variation of GPS Points - User {user_id} - K-Means')
    plt.xlabel('Date and Time')
    plt.ylabel('Latitude')
    plt.legend(title='K-Means Cluster')
    plt.show()

In [None]:
# Plotting daily variation of GPS points for DBSCAN
for user_id in dbscan_clustered_data['user_id'].unique():
    user_data_dbscan = dbscan_clustered_data[dbscan_clustered_data['user_id'] == user_id]
    
    # Plot daily variation of GPS points for DBSCAN
    plt.figure(figsize=(12, 6))
    sns.scatterplot(x='datetime', y='lat', hue='dbscan_cluster', data=user_data_dbscan)
    plt.title(f'Daily Variation of GPS Points - User {user_id} - DBSCAN')
    plt.xlabel('Date and Time')
    plt.ylabel('Latitude')
    plt.legend(title='DBSCAN Cluster')
    plt.show()

In [None]:
# Plotting dwell durations by day of the week for K-Means
for user_id in valid_dwells_kmeans['user_id'].unique():
    # Extract valid dwell durations data for the current user using K-Means
    user_valid_dwells_kmeans = valid_dwells_kmeans[valid_dwells_kmeans['user_id'] == user_id]

    # Plot dwell durations by day of the week for K-Means
    plt.figure(figsize=(12, 6))
    sns.boxplot(x=user_valid_dwells_kmeans['min'].dt.day_name(), y=user_valid_dwells_kmeans['dwell_duration'].dt.total_seconds() / 60, color='blue')
    plt.title(f'Dwell Durations by Day of the Week - User {user_id} - K-Means')
    plt.xlabel('Day of the Week')
    plt.ylabel('Dwell Duration (minutes)')
    plt.show()
    

In [None]:
# Plotting dwell durations by day of the week for DBSCAN
for user_id in valid_dwells_dbscan['user_id'].unique():
    # Extract valid dwell durations data for the current user using DBSCAN
    user_valid_dwells_dbscan = valid_dwells_dbscan[valid_dwells_dbscan['user_id'] == user_id]

    # Plot dwell durations by day of the week for DBSCAN
    plt.figure(figsize=(12, 6))
    sns.boxplot(x=user_valid_dwells_dbscan['min'].dt.day_name(), y=user_valid_dwells_dbscan['dwell_duration'].dt.total_seconds() / 60, color='green')
    plt.title(f'Dwell Durations by Day of the Week - User {user_id} - DBSCAN')
    plt.xlabel('Day of the Week')
    plt.ylabel('Dwell Duration (minutes)')
    plt.show()


In [None]:
# Plotting dwell frequencies by user for K-Means
plt.figure(figsize=(12, 8))
for i, user_dwell_freq_kmeans in enumerate(dwell_frequencies_kmeans):
    user_id = clean_data['user_id'].unique()[i]
    
    # Check if user_dwell_freq_kmeans is a single integer
    if isinstance(user_dwell_freq_kmeans, int):
        user_dwell_freq_kmeans = [user_dwell_freq_kmeans]
    
    plt.bar(np.arange(len(user_dwell_freq_kmeans)) + i * 0.2, user_dwell_freq_kmeans, width=0.2, label=f'User {user_id}')

plt.xlabel('K-Means Clusters')
plt.ylabel('Dwell Frequency')
plt.title('Dwell Frequencies by User for K-Means')
plt.legend()
plt.show()


In [None]:
# Plotting dwell frequencies by user for DBSCAN
plt.figure(figsize=(12, 8))
for i, user_dwell_freq_dbscan in enumerate(dwell_frequencies_dbscan):
    user_id = clean_data['user_id'].unique()[i]
    
    # Check if user_dwell_freq_dbscan is a single integer
    if isinstance(user_dwell_freq_dbscan, int):
        user_dwell_freq_dbscan = [user_dwell_freq_dbscan]
    
    plt.bar(np.arange(len(user_dwell_freq_dbscan)) + i * 0.2, user_dwell_freq_dbscan, width=0.2, label=f'User {user_id}')

plt.xlabel('DBSCAN Clusters')
plt.ylabel('Dwell Frequency')
plt.title('Dwell Frequencies by User for DBSCAN')
plt.legend()
plt.show()


In [None]:
# Plotting the dwell frequencies by cluster for K-Means
# Function to pad lists with zeros
def pad_with_zeros(lst, target_length):
    return lst if isinstance(lst, list) else [lst] + [0] * (target_length - 1)

# Find the maximum length among all dwell frequency lists
max_length_kmeans = max(len(lst) if isinstance(lst, list) else 1 for lst in dwell_frequencies_kmeans)
max_length_dbscan = max(len(lst) if isinstance(lst, list) else 1 for lst in dwell_frequencies_dbscan)

# Pad all lists with zeros to the maximum length
dwell_frequencies_kmeans_padded = [pad_with_zeros(lst, max_length_kmeans) for lst in dwell_frequencies_kmeans]
dwell_frequencies_dbscan_padded = [pad_with_zeros(lst, max_length_dbscan) for lst in dwell_frequencies_dbscan]

# Aggregate dwell frequencies for K-Means across all users
dwell_frequencies_kmeans_agg = np.array(dwell_frequencies_kmeans_padded).sum(axis=0)

plt.figure(figsize=(10, 6))
plt.bar(np.arange(len(dwell_frequencies_kmeans_agg)), dwell_frequencies_kmeans_agg, width=0.4, label='K-Means')
plt.xlabel('K-Means Clusters')
plt.ylabel('Total Dwell Frequency')
plt.title('Aggregated Dwell Frequencies by Cluster for K-Means')  # Updated title
plt.legend()
plt.show()


In [None]:
# Plotting dwell frequencies by cluster for DBSCAN
# Function to pad lists with zeros
def pad_with_zeros(lst, target_length):
    return lst if isinstance(lst, list) else [lst] + [0] * (target_length - 1)

# Find the maximum length among all DBSCAN dwell frequency lists
max_length_dbscan = max(len(lst) if isinstance(lst, list) else 1 for lst in dwell_frequencies_dbscan)

# Pad all DBSCAN lists with zeros to the maximum length
dwell_frequencies_dbscan_padded = [pad_with_zeros(lst, max_length_dbscan) for lst in dwell_frequencies_dbscan]

# Aggregate dwell frequencies for DBSCAN across all users
dwell_frequencies_dbscan_agg = np.zeros(max_length_dbscan)

for lst in dwell_frequencies_dbscan_padded:
    pad_length = max_length_dbscan - len(lst)
    lst_padded = np.pad(np.array(lst), (0, pad_length), 'constant', constant_values=(0))
    dwell_frequencies_dbscan_agg = np.add(dwell_frequencies_dbscan_agg, lst_padded)

plt.figure(figsize=(10, 6))
plt.bar(np.arange(len(dwell_frequencies_dbscan_agg)), dwell_frequencies_dbscan_agg, width=0.4, label='DBSCAN', color='orange')
plt.xlabel('DBSCAN Clusters')
plt.ylabel('Total Dwell Frequency')
plt.title('Dwell Frequencies by Cluster for DBSCAN')
plt.legend()
plt.show()


In [None]:
# Plotting a boxplot for KMeans
# Transposing the dwell_frequencies_kmeans_padded to have clusters as rows
dwell_frequencies_kmeans_transposed = np.array(dwell_frequencies_kmeans_padded).T

# Ensure that each sublist in dwell_frequencies_kmeans_transposed is a 1D array
dwell_frequencies_kmeans_transposed = [np.array(lst) for lst in dwell_frequencies_kmeans_transposed]

plt.figure(figsize=(10, 6))
plt.boxplot(dwell_frequencies_kmeans_transposed, labels=[f'Cluster {i+1}' for i in range(len(dwell_frequencies_kmeans_transposed))])
plt.xlabel('K-Means Clusters')
plt.ylabel('Dwell Frequency')
plt.title('Boxplot of Dwell Frequencies for K-Means')
plt.show()


In [None]:
# Plotting a boxplot for DBSCAN
# Transposing the dwell_frequencies_dbscan_padded to have clusters as rows
dwell_frequencies_dbscan_transposed = np.array(dwell_frequencies_dbscan_padded).T

# Ensure that each sublist in dwell_frequencies_dbscan_transposed is a 1D array
dwell_frequencies_dbscan_transposed = [np.array(lst) for lst in dwell_frequencies_dbscan_transposed]

plt.figure(figsize=(10, 6))
plt.boxplot(dwell_frequencies_dbscan_transposed, labels=[f'Cluster {i+1}' for i in range(len(dwell_frequencies_dbscan_transposed))], patch_artist=True, boxprops=dict(facecolor='orange', color='black'))
plt.xlabel('DBSCAN Clusters')
plt.ylabel('Dwell Frequency')
plt.title('Boxplot of Dwell Frequencies for DBSCAN')
plt.show()
