# Imports and Settings

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.cluster import DBSCAN, KMeans
from sklearn.neighbors import KernelDensity, BallTree
from sklearn.preprocessing import StandardScaler
from scipy.spatial import ConvexHull
from matplotlib.ticker import ScalarFormatter
import os
from colorama import Fore, Style
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull
from shapely.geometry import Polygon
import json
import plotly.express as px
from haversine import haversine, Unit
import math
import random

# File loading


In [2]:
columns = [
    "vehicleId", 
    "lat", 
    "lng", 
    # "dateStored", 
    "velocity",
    # "odometer", 
    # "engineVoltage", 
    "dateStoredHuman", 
    # "dateOnlyStoredHuman",    
    # "timeOnly",
    "bearing",
    "orientation", 
    "bearing_diff",
    "seconds_diff", 
    "acceleration",
    "isProblem",
    "trip_id",
    "velocity_diff",
    "distance_m"
]


input_dir   = "../../DataSets/API_Responses/Vehicle_Data/"
filename    = "all_vehicle_responses.csv"

## Enable matloblib UI backend

In [3]:
%matplotlib tk

## Save plots file

In [4]:
PLOT_FOLDER_PATH = "./Plots/"


In [5]:
def merge_csv_file(input_dir, filename, columns):
    input_file = os.path.join(input_dir, filename)

    if not os.path.exists(input_file):
        raise FileNotFoundError(f"File '{filename}' not found in directory '{input_dir}'")

    try:
        # Read the CSV while allowing missing columns
        df = pd.read_csv(input_file, usecols=lambda x: x.strip() in columns, encoding='utf-8')
    except Exception as e:
        raise ValueError(f"Error reading '{input_file}': {e}")

    return df



merged_dfs = merge_csv_file(input_dir, filename, columns)
print(merged_dfs.head())


   vehicleId        lat        lng  velocity      dateStoredHuman  distance_m  \
0          1  37.510833  22.385710       0.0  2024-06-06 17:02:17        0.00   
1          1  37.510603  22.385977       0.0  2024-06-06 17:02:20       34.75   
2          1  37.510640  22.385927       6.0  2024-06-06 17:02:25        6.01   
3          1  37.510750  22.385907       7.0  2024-06-06 17:02:31       12.33   
4          1  37.510877  22.385698      26.0  2024-06-06 17:02:37       23.17   

   seconds_diff  trip_id     bearing orientation  bearing_diff  velocity_diff  \
0           0.0        0  137.402376   Southeast          0.00            0.0   
1           3.0        0  312.778670   Northwest        175.38            0.0   
2           5.0        0  351.785725       North         39.01            6.0   
3           6.0        0  307.481149   Northwest         44.30            1.0   
4           6.0        0  318.388767   Northwest         10.91           19.0   

   acceleration  isProblem

Set **Bounding Box** only for **Τρίπολη**

# Data Overview

In [6]:
df = merged_dfs
df_danger = df[df['isProblem'] == 1]
# df_danger = df[df['vehicleId'] == 15]


sns.set_theme(style="ticks")
fig, ax = plt.subplots()
#sns.jointplot(x=df_danger['lng'], y=df_danger['lat'], kind="hex", color="#4CB391", ax=ax)
ax.hexbin(x=df_danger['lng'], y=df_danger['lat'])
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

plt.gca().xaxis.set_major_formatter(ScalarFormatter())
plt.gca().yaxis.set_major_formatter(ScalarFormatter())
plt.ticklabel_format(style='plain', axis='both')  # Disable scientific notation


ax.set_title('Density of problem points on spatial coordinates')

Text(0.5, 1.0, 'Density of problem points on spatial coordinates')

### Init DF15 (VehicleId == 15)

In [7]:
# df15 = df[df["vehicleId"] == 15]
# df15 = df15.head(500)
# df15_problem = df15[df15['isProblem'] == 1]
# plt.plot(df15.index, df15['acceleration'])
# plt.title('Acceleration vs Index')
# plt.ylabel('Acceleration')
# plt.xlabel('Index')
# plt.scatter(df15_problem.index, df15_problem['acceleration'], color='red')

# len(df15)

In [8]:
df_danger[['lng', 'lat']].describe()

Unnamed: 0,lng,lat
count,2194.0,2194.0
mean,22.378132,37.515329
std,0.006988,0.006203
min,22.363152,37.497893
25%,22.372422,37.51081
50%,22.376219,37.513109
75%,22.385158,37.519328
max,22.415382,37.53314


# Clustering

In [9]:
# #### MOCK DATA #####
#
# data = {
#     'lng': np.random.uniform(-180, 180, 200),
#     'lat': np.random.uniform(-90, 90, 200)
# }
# df = pd.DataFrame(data)
# df_danger = df

In [10]:
# Extracting the coordinates
coords = df_danger[['lng', 'lat']].values

# Standardizing the data for better clustering performance
scaler = StandardScaler()
coords_scaled = scaler.fit_transform(coords)

# Applying DBSCAN
dbscan = DBSCAN(eps=0.02, min_samples=4)  # Adjust eps as needed
clusters = dbscan.fit_predict(coords_scaled)

df_danger.loc[:, 'cluster'] = clusters  # Adding cluster labels to DataFrame


df_danger_cluster = df_danger[df_danger['cluster'] > -1]


# %matplotlib inline
# Plotting the results
plt.figure(figsize=(10, 6))
plt.scatter(df_danger_cluster['lng'], df_danger_cluster['lat'], c=df_danger_cluster['cluster'], cmap='tab10', edgecolors='k', alpha=0.7)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('DBSCAN Clustering of Geospatial Data')
plt.colorbar(label='Cluster')

# Save the plot
plt.savefig(PLOT_FOLDER_PATH, bbox_inches='tight')
print(f"Plot saved to {PLOT_FOLDER_PATH}")

plt.show()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_danger.loc[:, 'cluster'] = clusters  # Adding cluster labels to DataFrame


Plot saved to ./Plots/


In [11]:
df_danger.columns

Index(['vehicleId', 'lat', 'lng', 'velocity', 'dateStoredHuman', 'distance_m',
       'seconds_diff', 'trip_id', 'bearing', 'orientation', 'bearing_diff',
       'velocity_diff', 'acceleration', 'isProblem', 'cluster'],
      dtype='object')

In [12]:
df_danger.describe()

Unnamed: 0,vehicleId,lat,lng,velocity,distance_m,seconds_diff,trip_id,bearing,bearing_diff,velocity_diff,acceleration,isProblem,cluster
count,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0,2194.0
mean,7.559708,37.515329,22.378132,14.578851,18.80577,3.530994,143.736554,181.834256,73.738127,-11.404284,-1.070413,1.0,9.009116
std,4.078162,0.006203,0.006988,14.990388,29.26726,3.172815,144.158906,107.172766,91.240364,9.91736,1.123217,0.0,18.368397
min,1.0,37.497893,22.363152,0.0,0.0,1.0,0.0,0.0,0.0,-141.0,-14.722222,1.0,-1.0
25%,7.0,37.51081,22.372422,6.0,6.7925,2.0,23.0,93.838222,10.2625,-14.0,-1.111111,1.0,-1.0
50%,7.0,37.513109,22.376219,10.0,15.21,3.0,90.0,186.73122,33.035,-9.0,-0.763889,1.0,-1.0
75%,9.0,37.519328,22.385158,18.0,22.7575,5.0,207.0,276.508284,102.5675,-5.0,-0.555556,1.0,9.0
max,20.0,37.53314,22.415382,123.0,579.01,50.0,505.0,359.405846,358.23,-2.0,-0.505051,1.0,61.0


## Showing convex hulls

In [13]:
def plot_convex_hulls(df, clusters, normal_df_points):
    unique_clusters = set(clusters)
    colors = plt.cm.get_cmap("tab10", len(unique_clusters))  # Set of distinct colors for clusters

    fig, ax = plt.subplots()  # Create figure and axis objects

    # Plot points first for colorbar
    for cluster in unique_clusters:
        if cluster == -1:
            continue  # Skip noise points
        cluster_points = df[df['cluster'] == cluster][['lng', 'lat']].values
        ax.scatter(cluster_points[:, 0], cluster_points[:, 1], label=f'Cluster {cluster}', c=[colors(cluster)], s=10)

    ax.scatter(normal_df_points['lng'], normal_df_points['lat'], c='gray', alpha=0.5)

    # Plot Convex Hulls
    for cluster in unique_clusters:
        if cluster == -1:
            continue  # Skip noise points
        cluster_points = df[df['cluster'] == cluster][['lng', 'lat']].values
        if len(cluster_points) >= 3:  # Convex hull requires at least 3 points
            hull = ConvexHull(cluster_points)
            hull_points = np.append(hull.vertices, hull.vertices[0])  # Close the loop
            ax.plot(cluster_points[hull_points, 0], cluster_points[hull_points, 1], 'r-')

    ax.set_xlabel('Longitude')
    ax.set_ylabel('Latitude')
    ax.set_title('DBSCAN Clustering of Geospatial Data with Convex Hulls')

    # Create colorbar using scatter points
    cb = fig.colorbar(plt.cm.ScalarMappable(cmap="tab10", norm=plt.Normalize(vmin=min(unique_clusters), vmax=max(unique_clusters))),
                      ax=ax, label='Cluster')

    # Save the plot
    plt.savefig(PLOT_FOLDER_PATH, bbox_inches='tight')
    print(f"Plot saved to {PLOT_FOLDER_PATH}")

    plt.show()

plot_convex_hulls(df_danger_cluster, clusters, df[df['isProblem'] == 0])


  colors = plt.cm.get_cmap("tab10", len(unique_clusters))  # Set of distinct colors for clusters


Plot saved to ./Plots/


## Get specific **Cluster's BBOX**

In [14]:
def get_bbox_of_clusters(df, clusters):
    cluster_bboxes = {}

    # Iterate over unique clusters (excluding -1 for noise)
    unique_clusters = sorted(set(clusters) - {-1})  # Exclude noise points (-1)

    for cluster in unique_clusters:
        # Filter the points of the current cluster
        cluster_points = df[df['cluster'] == cluster][['lng', 'lat']]
        
        # Get the minimum and maximum lng and lat for the bounding box
        min_lng = cluster_points['lng'].min()
        max_lng = cluster_points['lng'].max()
        min_lat = cluster_points['lat'].min()
        max_lat = cluster_points['lat'].max()

        # Store the bounding box for the current cluster
        cluster_bboxes[cluster] = {
            'min_lng': min_lng,
            'max_lng': max_lng,
            'min_lat': min_lat,
            'max_lat': max_lat
        }

    return cluster_bboxes

cluster_bboxes = get_bbox_of_clusters(df_danger_cluster, clusters)

# Display the bounding boxes for each cluster
for cluster, bbox in cluster_bboxes.items():
    print(f"Cluster {cluster}: {bbox}")


Cluster 0: {'min_lng': 22.3853133, 'max_lng': 22.3874666, 'min_lat': 37.5102916, 'max_lat': 37.5114683}
Cluster 1: {'min_lng': 22.3841633, 'max_lng': 22.3843916, 'min_lat': 37.510425, 'max_lat': 37.5105433}
Cluster 2: {'min_lng': 22.3848866, 'max_lng': 22.3851333, 'min_lat': 37.5107683, 'max_lat': 37.5109899}
Cluster 3: {'min_lng': 22.3844933, 'max_lng': 22.3848033, 'min_lat': 37.510815, 'max_lat': 37.5110133}
Cluster 4: {'min_lng': 22.3854, 'max_lng': 22.3854449, 'min_lat': 37.5114433, 'max_lat': 37.5117283}
Cluster 5: {'min_lng': 22.3832566, 'max_lng': 22.3834216, 'min_lat': 37.5120583, 'max_lat': 37.512245}
Cluster 6: {'min_lng': 22.3759916, 'max_lng': 22.3763466, 'min_lat': 37.512855, 'max_lat': 37.5131816}
Cluster 7: {'min_lng': 22.3749533, 'max_lng': 22.3750599, 'min_lat': 37.509825, 'max_lat': 37.509945}
Cluster 8: {'min_lng': 22.3840833, 'max_lng': 22.3842783, 'min_lat': 37.5138016, 'max_lat': 37.5138783}
Cluster 9: {'min_lng': 22.3715183, 'max_lng': 22.3726783, 'min_lat': 37.5

## Plot Orientations with Convex Hulls

### Prepare the DF

In [15]:
# *Get specific columns 
_ = merged_dfs.copy()
bearings_df = _[['vehicleId', 'lat', 'lng', 'dateStoredHuman' ,'bearing', 'orientation', 'seconds_diff', 'trip_id']]
print(bearings_df)

# Filter for vehicleId == 1
df_vehicle1 = df[df['vehicleId'] == 1]

# Count occurrences of each trip_id
trip_counts = df_vehicle1['trip_id'].value_counts()

# Get the trip_id with the highest count
most_frequent_trip_id = trip_counts.idxmax()

# Display the result
print(f"The trip_id with the most rows for vehicleId 1 is: {most_frequent_trip_id}")


       vehicleId        lat        lng      dateStoredHuman     bearing  \
0              1  37.510833  22.385710  2024-06-06 17:02:17  137.402376   
1              1  37.510603  22.385977  2024-06-06 17:02:20  312.778670   
2              1  37.510640  22.385927  2024-06-06 17:02:25  351.785725   
3              1  37.510750  22.385907  2024-06-06 17:02:31  307.481149   
4              1  37.510877  22.385698  2024-06-06 17:02:37  318.388767   
...          ...        ...        ...                  ...         ...   
28443         20  37.531460  22.369768  2025-03-06 13:09:13  231.663210   
28444         20  37.531275  22.369473  2025-03-06 13:09:15  235.207818   
28445         20  37.531122  22.369195  2025-03-06 13:09:19  278.389323   
28446         20  37.531148  22.368967  2025-03-06 13:09:21  294.596339   
28447         20  37.531243  22.368705  2025-03-06 13:09:24         NaN   

      orientation  seconds_diff  trip_id  
0       Southeast           0.0        0  
1       North

### Get trip_id's rows

In [16]:
print(df[(df['vehicleId'] == 1) & (df['trip_id'] == most_frequent_trip_id)].head(5))

     vehicleId        lat        lng  velocity      dateStoredHuman  \
946          1  37.510722  22.387033      16.0  2024-06-08 17:32:23   
947          1  37.510692  22.387185       8.0  2024-06-08 17:32:30   
948          1  37.510670  22.387042       9.0  2024-06-08 17:32:38   
949          1  37.510645  22.386773      22.0  2024-06-08 17:32:41   
950          1  37.510685  22.386552      34.0  2024-06-08 17:32:43   

     distance_m  seconds_diff  trip_id     bearing orientation  bearing_diff  \
946        0.00           0.0       64  103.998723        East          0.00   
947       13.82           7.0       64  259.248208        West        155.25   
948       12.90           8.0       64  263.300439        West          4.05   
949       23.88           3.0       64  282.783117        West         19.48   
950       20.10           2.0       64  277.535628        West          5.25   

     velocity_diff  acceleration  isProblem  
946          -40.0      0.000000          0  


## Plot std deviation

### 2️⃣ Violin Plot (Distribution)

In [17]:
# Ensure std_dev is calculated
std_dev = merged_dfs['bearing_diff'].std()

plt.figure(figsize=(8, 5))

# Create the violin plot
sns.violinplot(x=merged_dfs['bearing_diff'], inner="quartile", color="lightblue")

# Mark the standard deviation with a vertical line
plt.axvline(x=std_dev, color='red', linestyle='--', label=f'Standard Deviation ({std_dev:.2f})')

# Labels and title
plt.xlabel('Bearing Difference')
plt.title('Violin Plot: Distribution of Bearing Difference')

# Show legend
plt.legend()

# Save the plot
plt.savefig(PLOT_FOLDER_PATH, bbox_inches='tight')
print(f"Plot saved to {PLOT_FOLDER_PATH}")

# Display the plot
plt.show()


Plot saved to ./Plots/


In [18]:
print((merged_dfs['bearing_diff'] < 0).sum())


0


### 3️⃣ Boxplot (Detect Outliers)

In [19]:
plt.figure(figsize=(6, 5))
sns.boxplot(x=merged_dfs['bearing_diff'], color='lightblue')

# Mark the standard deviation
plt.axvline(x=std_dev, color='red', linestyle='--', label=f'Standard Deviation ({std_dev:.2f})')

plt.xlabel('Bearing Difference')
plt.title('Boxplot: Bearing Difference Outliers')
plt.legend()

# Save the plot
plt.savefig(PLOT_FOLDER_PATH, bbox_inches='tight')
print(f"Plot saved to {PLOT_FOLDER_PATH}")

plt.show()


Plot saved to ./Plots/


## Plot bearings vs seconds diff 

### ✅ Option 1: Matplotlib (simple, static plot)

In [20]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(merged_dfs['seconds_diff'], merged_dfs['bearing_diff'], alpha=0.6, edgecolors='w')
plt.title('Scatter Plot of Bearing Difference vs. Seconds Difference')
plt.xlabel('Seconds Difference')
plt.ylabel('Bering Difference')
plt.grid(True)
plt.tight_layout()
plot_save_path = os.path.join(PLOT_FOLDER_PATH, "Scatter_bearings_vs_seconds_diff.png")
plt.savefig(plot_save_path, bbox_inches='tight')
plt.show()


## Plot bearings vs 10 seconds_diff

In [21]:
# Filter the DataFrame and take only the first 20 rows
filtered_df = merged_dfs[merged_dfs['seconds_diff'] <= 10].head(20)

# Create the plot
fig, ax = plt.subplots(figsize=(10, 6))

# Scatter plot (swapped axes)
ax.scatter(
    filtered_df['bearing_diff'],
    filtered_df['seconds_diff'],
    alpha=0.6,
    edgecolors='w',
    label='Points'
)

# Generate a list of 20 distinct colors using a colormap
colors = plt.cm.get_cmap('tab20', 20)  # Using the 'tab20' colormap

# Plot segments with changing color for each segment
for i in range(len(filtered_df) - 1):
    # Get the x and y values for the two points
    x_start = filtered_df['bearing_diff'].iloc[i]
    y_start = filtered_df['seconds_diff'].iloc[i]
    x_end = filtered_df['bearing_diff'].iloc[i + 1]
    y_end = filtered_df['seconds_diff'].iloc[i + 1]

    # Assign a unique color for each segment from the 'tab20' colormap
    color = colors(i)  # Get a different color for each segment

    # Plot each segment with its own color
    ax.plot([x_start, x_end], [y_start, y_end], color=color, lw=2)

# Add incremental labels
for i, (x, y) in enumerate(zip(filtered_df['bearing_diff'], filtered_df['seconds_diff'])):
    ax.text(x + 0.5, y + 0.1, str(i), fontsize=9, color='black')

# Prepare the orientation labels
orientation_list = [f"{i+1}. {orientation}" for i, orientation in enumerate(filtered_df['orientation'])]

# Prepare cell text for the table
cell_text = [[orientation] for orientation in orientation_list]

# Add the table on the right side of the plot (without color)
table = ax.table(cellText=cell_text,
                 colLabels=['Orientation'],
                 loc='right',
                 cellLoc='center',
                 colColours=['lightgray'],  # Keep the header background color if desired
                 bbox=[1.05, 0, 0.2, 1])  # Adjust bbox for position and size

# Labels and title
ax.set_title('Bearing Difference vs Seconds Difference (<= 10 seconds)')
ax.set_xlabel('Bearing Diff')
ax.set_ylabel('Seconds Diff')
ax.grid(True)
ax.legend()





plot_save_path = os.path.join(PLOT_FOLDER_PATH, "bearings_vs_seconds_diff.png")
plt.savefig(plot_save_path, bbox_inches='tight')
plt.show()


  colors = plt.cm.get_cmap('tab20', 20)  # Using the 'tab20' colormap


# Trajectory Analysis

### Plot random vehicle ids and trip_ids **exclude VehicleId: 1 because it has problematic data points**

In [None]:
import random
import matplotlib.pyplot as plt

def random_ID(df):
    # Group by 'vehicleId' and 'trip_id' to get the count of rows per trip_id
    trip_counts = df.groupby(['vehicleId', 'trip_id']).size().reset_index(name='count')

    # Filter out the trip_ids where the count is less than 10
    valid_trip_counts = trip_counts[trip_counts['count'] >= 10]
    
    # Randomly select a vehicleId and trip_id from the valid ones
    selected_trip = valid_trip_counts.sample(1).iloc[0]
    vehID = selected_trip['vehicleId']
    tripID = selected_trip['trip_id']
    
    return vehID, tripID

# Loop through 10 times
for i in range(10):
    # Get a random vehicleId and trip_id with at least 10 rows
    vehicle_id, trip_id = random_ID(merged_dfs)
    print(f'Iteration {i+1} - vehicleId: {vehicle_id}, trip_id: {trip_id}')

    # Filter the dataframe based on the selected vehicleId and trip_id
    filtered_df = merged_dfs[(merged_dfs['vehicleId'] == vehicle_id) & (merged_dfs['trip_id'] == trip_id)]

    # Get the number of rows for the selected trip_id
    num_rows = len(filtered_df)
    
    # Plot the coordinates on a scatter plot
    plt.figure(figsize=(8, 6))
    plt.scatter(filtered_df['lng'], filtered_df['lat'], c='blue', marker='o', label=f'Vehicle {vehicle_id} - Trip {trip_id}')
    
    # Update the title to include the number of rows
    plt.title(f'Coordinates for Vehicle {vehicle_id} - Trip {trip_id} - {num_rows} Data Points')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.legend()
    plt.grid(True)
    plt.show()


Iteration 1 - vehicleId: 7, trip_id: 109
Iteration 2 - vehicleId: 7, trip_id: 362
Iteration 3 - vehicleId: 7, trip_id: 201
Iteration 4 - vehicleId: 9, trip_id: 467
Iteration 5 - vehicleId: 7, trip_id: 67
Iteration 6 - vehicleId: 7, trip_id: 225
Iteration 7 - vehicleId: 9, trip_id: 43
Iteration 8 - vehicleId: 1, trip_id: 96
Iteration 9 - vehicleId: 9, trip_id: 215
Iteration 10 - vehicleId: 13, trip_id: 0


#### Filter trips for calculations

In [23]:
# !Ensure trip_id has at least 6 rows, otherwise, do not calculate any slaloms
trip_counts = merged_dfs.groupby(['vehicleId', 'trip_id']).size().reset_index(name='count')
print(trip_counts)

trip_distances = merged_dfs.groupby(['vehicleId', 'trip_id'])['distance_m'].sum().reset_index(name='total_distance_m')
print(trip_distances)

# *Filter only those trip_ids with more than 10 rows distance traveled > 50
valid_trips = trip_counts[(trip_counts['count'] > 10) & (trip_distances['total_distance_m'] > 50.00)]

filtered_merged_dfs = merged_dfs.copy()
filtered_merged_dfs = pd.merge(merged_dfs, valid_trips[['vehicleId', 'trip_id']], on=['vehicleId', 'trip_id'], how='inner')
print(f"Row difference excluded: {len(merged_dfs)} - {len(filtered_merged_dfs)} = {len(merged_dfs) - len(filtered_merged_dfs)}")



      vehicleId  trip_id  count
0             1        0     13
1             1        1      2
2             1        2      6
3             1        3      4
4             1        4      1
...         ...      ...    ...
1422         20        7      3
1423         20        8     89
1424         20        9     11
1425         20       10      7
1426         20       11     10

[1427 rows x 3 columns]
      vehicleId  trip_id  total_distance_m
0             1        0            291.50
1             1        1              3.88
2             1        2             99.97
3             1        3             44.61
4             1        4              0.00
...         ...      ...               ...
1422         20        7             21.63
1423         20        8           1597.54
1424         20        9            253.65
1425         20       10             25.93
1426         20       11            239.76

[1427 rows x 3 columns]
Row difference excluded: 28448 - 25043 = 3405


# Save DF to csv

In [24]:
visualize_dangers_path = "./visualize_dangers.csv"
merged_dfs.to_csv(visualize_dangers_path, index=False)
print(Fore.GREEN + f"DataFrame stored to {visualize_dangers_path}" + Style.RESET_ALL)

[32mDataFrame stored to ./visualize_dangers.csv[0m
