# Imports

In [1]:
import json
from datetime import datetime, time, timedelta
import os
from Powerfleet_APIs_Management import PowerFleetAPIsManager as ApiManager
from colorama import Fore, Style
import sys  # For printing caught exceptions
import re
from Powerfleet_APIs_Management import MongoDBConnector as DBConnector
from pathlib import Path
import pandas as pd
import csv
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import geopandas as gpd
import math
from typing import *
import statistics
import matplotlib.cm as cm
import seaborn as sns
# from geojson_creator import GeoJSONCreator, VehicleGeoJSONCreator
from typing import Dict
import ast
import requests

## Enable plot UI backend

In [2]:
%matplotlib tk

# Define **Tripoli's BBOX**

In [29]:
# Define bounding box coordinates
tripoli_bbox = {
    'latMin': 37.49764419371479,
    'latMax': 37.56244081620044,
    'lngMin': 22.344992459074458,
    'lngMax': 22.521463853839485
}

## Update <u>PARAMETERS.json</u> with **current** date and time

In [4]:
file_path = "PARAMETERS.json" 

# Get the current date and time
current_datetime = datetime.now().strftime("%Y-%m-%d")  # Format as YYYY-MM-DD
current_time = datetime.now().strftime("%H:%M:%S")  # Format as HH:MM:SS

# Check if the file exists and is not empty
if not os.path.exists(file_path):
    print(f"Error: The file '{file_path}' does not exist.")
    exit()

# Check if the file is empty
if os.path.getsize(file_path) == 0:
    print(f"Error: The file '{file_path}' is empty.")
    exit()

# Load the current JSON data
try:
    with open(file_path, "r") as file:
        data = json.load(file)
except json.JSONDecodeError as e:
    print(f"Error: Failed to decode JSON. Details: {e}")
    exit()

# Combine the date and time
combined_datetime = current_datetime + " " + current_time

# Update the 'endDate' field in the 'snapshot_api' section
data["snapshot_api"]["endDate"] = combined_datetime

# Save the updated JSON data back to the file
try:
    with open(file_path, "w") as file:
        json.dump(data, file, indent=4)  # Use indent for readable JSON formatting
    print(f"Updated PARAMETERS.json with: {combined_datetime}.")
except Exception as e:
    print(f"Error: Failed to save the updated file. Details: {e}")


Updated PARAMETERS.json with: 2025-03-23 17:05:29.


## Redy variables for APIs usage

In [5]:
print(Fore.YELLOW + f"MAIN CWD={os.getcwd()}")
print(f"MAIN, LISTDIR={os.listdir()}" + Style.RESET_ALL)

# Define the ApiParameterExtractor class
class ApiParameterExtractor:
    def __init__(self, json_file):
        with open(json_file, 'r') as file:
            self.parameters = json.load(file)

    def extract_parameters(self, api_type):
        # Normalize API type to lowercase
        api_type = api_type.lower()
        
        # Validate API type
        if api_type not in self.parameters:
            raise ValueError(f"Invalid API type: {api_type}. Valid types are 'live_api' or 'snapshot_api'.")
        
        # Return the corresponding dictionary for the given API type
        return self.parameters[api_type]

extractor = ApiParameterExtractor("PARAMETERS.json")

# Extract parameters for live_api and snapshot_api
live_api_params         = extractor.extract_parameters("live_api")
snapshot_api_params     = extractor.extract_parameters("snapshot_api")

# Pass the extracted parameters to PowerFleetAPIsManager
live_api_manager        = ApiManager(live_api_params)
snapshot_api_manager    = ApiManager(snapshot_api_params)




[33mMAIN CWD=/home/georger/WorkDocuments/Sxoli/Ptyxiaki/Ptyxiaki-Repository/Python_Scripts/Data_Analysis
MAIN, LISTDIR=['Powerfleet_APIs_Management.py', 'projection_to_streets.ipynb', 'geojson_creator.py', 'visualize_danger_areas_v1.ipynb', 'single_data_analysis.ipynb', 'acceleration_plot.png', 'Plots', 'PARAMETERS.json', '__pycache__', 'all_data_analysis.ipynb'][0m


## Fetch Live & Snapshot APIs

In [6]:
# === Function to Save DataFrame to CSV ===
def save_to_csv(dataframe, output_dir, file_name):
    """
    Save a DataFrame to a specified CSV file in the given directory.
    
    :param dataframe: The DataFrame to save.
    :param output_dir: The directory where the CSV file will be saved.
    :param file_name: The name of the CSV file (e.g., "file_name.csv").
    """
    if not dataframe.empty:  # Only save if the DataFrame is not empty
        os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists
        file_path = Path(output_dir) / file_name  # Construct the full file path
        dataframe.to_csv(file_path, index=False)  # Overwrite the CSV file by default
        print(f"Data saved to {file_path}")
    else:
        print(f"No data to save for {file_name}. Skipping...")

# === Function to Convert UNIX Timestamps to Datetime Using Pandas ===
def unix_to_datetime_pandas(dataframe, column_name):
    """
    Convert a UNIX timestamp column to human-readable datetime format.
    
    :param dataframe: The DataFrame containing the column.
    :param column_name: The column name containing UNIX timestamps.
    """
    if column_name in dataframe.columns:
        try:
            dataframe[column_name] = pd.to_datetime(dataframe[column_name] / 1000, unit='s')  # Convert from ms to seconds
            print(f"Converted '{column_name}' to human-readable datetime.")
        except Exception as e:
            print(f"Error converting column '{column_name}': {e}")
    else:
        print(f"Column '{column_name}' not found in DataFrame.")


def merge_merged_dfs(ALL_merged_dfsS):
    result = pd.concat(ALL_merged_dfsS, ignore_index=True)
    print(type(result))
    return result
    
    
# === Constants ===
target_file = "../../DataSets/API_Responses/Vehicle_Data/Every_vehicle_response.csv"
# snapshot_csv_path = "../../DataSets/API_Responses/Snapshot_API_Response_data_set.csv"  # Modify with your source CSV path

# === Step 1: Read Headers from the Source File ===
# with open(snapshot_csv_path, mode="r") as infile:
#     reader = csv.reader(infile)
#     headers = next(reader)  # Extract headers from the source file

# === Step 2: Write Headers to the Target File ===
# with open(target_file, mode="w", newline="") as outfile:
#     writer = csv.writer(outfile)
#     writer.writerow(headers)  # Write the headers into the target file

# === Step 3: Load Parameters from JSON File ===
with open("PARAMETERS.json", "r") as file:
    PARAMETERS = json.load(file)  # Load API parameters from the JSON file

# === Step 4: Loop to Fetch and Append Data for Each Vehicle ===
_cnt = 1  # Counter for vehicle IDs
snapshot_data = None  # Initialize snapshot_data to enter the loop


ALL_merged_dfsS = []

while _cnt <= 20:  # Run for up to 100 times
    # Update the vehicleId in PARAMETERS
    PARAMETERS["snapshot_api"]["vehicleId"] = str(_cnt)
    
    # Extract updated vehicleId and fetch snapshot data
    vehicleId = PARAMETERS["snapshot_api"]["vehicleId"]
    startDate = PARAMETERS["snapshot_api"]["startDate"]
    endDate = PARAMETERS["snapshot_api"]["endDate"]
    
    # Fetch snapshot data (Returns JSON)
    snapshot_data = snapshot_api_manager.get_snapshot_data(vehicleId, startDate, endDate)
    
    # Skip if no data is found (None or empty data)
    if not snapshot_data:
        print(f"No data found for vehicleId {vehicleId}. Skipping...")
        _cnt += 1
        continue
    
    # Parse the JSON snapshot data into a DataFrame
    try:
        # If the snapshot data is a valid JSON string, parse it
        snapshot_data_dict = json.loads(snapshot_data) if isinstance(snapshot_data, str) else snapshot_data
        
        # Handle JSON structure (either dict or list)
        if isinstance(snapshot_data_dict, dict):
            snapshot_data_list = [snapshot_data_dict]  # Convert single dict to list of dicts
        elif isinstance(snapshot_data_dict, list):
            snapshot_data_list = snapshot_data_dict  # Already a list of dicts
        else:
            snapshot_data_list = []
        
        # Create a DataFrame from the list of dictionaries
        snapshot_merged_dfs = pd.DataFrame(snapshot_data_list)
            
        ALL_merged_dfsS.append(snapshot_merged_dfs)
        #save_to_csv(snapshot_merged_dfs, "../../DataSets/API_Responses/Vehicle_Data", f"Vehicle_{_cnt}_response.csv")

        
        
    
    except json.JSONDecodeError as e:
        print(f"Error decoding snapshot JSON for vehicleId {_cnt}: {e}")

    # Increment the vehicleId counter
    _cnt += 1
    

merged_dfs = merge_merged_dfs(ALL_merged_dfsS)
print(type(merged_dfs))
    
# Save the DataFrame to CSV only if it's not empty
save_to_csv(merged_dfs, "../../DataSets/API_Responses/Vehicle_Data", "all_vehicle_responses.csv")


print("Process complete!")



**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![0m
**Snapshot API**
[32mSnapshot API Request Successful![

# **Filter only Tripoli's BBOX**


In [None]:
# Filter the rows that are within the bounding box
merged_dfs = merged_dfs[(merged_dfs['lat'] >= tripoli_bbox['latMin']) & 
                        (merged_dfs['lat'] <= tripoli_bbox['latMax']) &
                        (merged_dfs['lng'] >= tripoli_bbox['lngMin']) & 
                        (merged_dfs['lng'] <= tripoli_bbox['lngMax'])]



## Visualize data **dispersion**

In [8]:
PLOT_FOLDER_PATH = "./Plots/"

## Plot number of data lines of each vehicle

In [9]:
# Collect unique vehicle IDs and the number of rows for each vehicle
unique_veh_ids = []
vehicle_rows = []

# Iterate over the list of all merged DataFrames
for individual_df in ALL_merged_dfsS:
    if not individual_df.empty:
        unique_veh_ids.append(individual_df['vehicleId'].values[0])  # Extract the vehicle ID
        vehicle_rows.append(len(individual_df))  # Count the number of rows in the DataFrame

# Debugging: Print the number of rows for each vehicle
for vehicle_id, rows in zip(unique_veh_ids, vehicle_rows):
    print(f"Vehicle ID {vehicle_id}: {rows} rows")

# Generate a colormap
num_vehicles = len(unique_veh_ids)
colors = cm.get_cmap('tab20', num_vehicles)(range(num_vehicles))  # Generate distinct colors for each vehicle

# Create a bar plot
plt.figure(figsize=(10, 6))
bars = plt.bar(unique_veh_ids, vehicle_rows, color=colors, edgecolor='black')  # Assign colors to bars

# Add a legend for the bar colors
legend_handles = [plt.Line2D([0], [0], color=color, lw=6, label=f'Vehicle {vehicle_id}') 
                  for color, vehicle_id in zip(colors, unique_veh_ids)]
plt.legend(handles=legend_handles, title="Vehicle ID", bbox_to_anchor=(1.05, 1), loc='upper left')

# Customize plot
plt.xlabel('Vehicle ID')
plt.ylabel('Number of Rows')
plt.title(f'Number of Rows per Vehicle ID from ({PARAMETERS["snapshot_api"]["startDate"]}) to ({PARAMETERS["snapshot_api"]["endDate"]})')
plt.xticks(unique_veh_ids, rotation=45)
plt.tight_layout()
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Set a logarithmic scale for better visualization of large variations
plt.yscale("log")


# Define the path to save the plot
plot_save_path = os.path.join(PLOT_FOLDER_PATH, "vehicle_rows_per_id.png")

# Save the plot
plt.savefig(plot_save_path, bbox_inches='tight')
print(f"Plot saved to {plot_save_path}")

# Show the plot (optional, can be removed if not needed)
plt.show()


Vehicle ID 1: 2692 rows
Vehicle ID 2: 56 rows
Vehicle ID 3: 79 rows
Vehicle ID 4: 52 rows
Vehicle ID 5: 241 rows
Vehicle ID 6: 93 rows
Vehicle ID 7: 12277 rows
Vehicle ID 8: 315 rows
Vehicle ID 9: 8163 rows
Vehicle ID 10: 23 rows
Vehicle ID 11: 91 rows
Vehicle ID 12: 93 rows
Vehicle ID 13: 85 rows
Vehicle ID 14: 62 rows
Vehicle ID 15: 31357 rows
Vehicle ID 16: 40 rows
Vehicle ID 17: 129 rows
Vehicle ID 18: 89 rows
Vehicle ID 19: 139 rows
Vehicle ID 20: 578 rows


  colors = cm.get_cmap('tab20', num_vehicles)(range(num_vehicles))  # Generate distinct colors for each vehicle


Plot saved to ./Plots/vehicle_rows_per_id.png


## Create 2 columns for datetime readable for Humans and for time alone & Sort by dateStored for more accracy


In [10]:
# Convert the 'dateStored' column to datetime format
merged_dfs['dateStoredHuman']           = merged_dfs['dateStored'].apply(lambda x: datetime.fromtimestamp(x / 1000))
merged_dfs['dateOnlyStoredHuman']       = merged_dfs['dateStoredHuman'].dt.date  # Extract the date only
merged_dfs['timeOnly']                  = merged_dfs['dateStoredHuman'].dt.time  # Extract the time only

#! Sort the DataFrame by 'dateStored' in ascending order
merged_dfs                              = merged_dfs.sort_values(by='dateStored', ascending=True)

# Extract columns of interest
lat_clmn                                = merged_dfs['lat']
long_clmn                               = merged_dfs['lng']
date_stored                             = merged_dfs['dateStored']

# #! Drop temp columns
# merged_dfs = merged_dfs.drop(columns=['dateStoredHuman'])



print(merged_dfs.head())

    vehicleId        lat        lng     dateStored  velocity  odometer  \
89          1  37.510833  22.385710  1717682537000       0.0       0.0   
90          1  37.510603  22.385977  1717682540000       0.0       0.0   
91          1  37.510640  22.385927  1717682545000       6.0       0.0   
92          1  37.510750  22.385907  1717682551000       7.0       0.0   
93          1  37.510877  22.385698  1717682557000      26.0       0.0   

    engineVoltage     dateStoredHuman dateOnlyStoredHuman  timeOnly  
89           0.28 2024-06-06 17:02:17          2024-06-06  17:02:17  
90           0.28 2024-06-06 17:02:20          2024-06-06  17:02:20  
91           0.28 2024-06-06 17:02:25          2024-06-06  17:02:25  
92           0.28 2024-06-06 17:02:31          2024-06-06  17:02:31  
93           0.28 2024-06-06 17:02:37          2024-06-06  17:02:37  


## Plot all trips of all vehicles

In [11]:
# Generate a list of unique colors
vehicle_ids = [df['vehicleId'].iloc[0] for df in ALL_merged_dfsS if 'vehicleId' in df.columns]
num_vehicles = len(vehicle_ids)
colors = cm.get_cmap('tab20', num_vehicles)  # Choose a colormap with sufficient colors

# Plot the data
plt.figure(figsize=(15, 10))
plt.title('Each vehicle\'s trip')

# Iterate through the DataFrame list and assign colors based on vehicle ID
for i, _tmp_merged_dfs in enumerate(ALL_merged_dfsS):
    if 'vehicleId' in _tmp_merged_dfs.columns:
        vehicle_id = _tmp_merged_dfs['vehicleId'].iloc[0]  # Assuming each DataFrame corresponds to one vehicle
        color = colors(i)
        plt.scatter(_tmp_merged_dfs['lng'], _tmp_merged_dfs['lat'], label=f'Vehicle {vehicle_id}', color=color)

# Plot formatting
plt.xlabel('Longitude')  # Fixed typo
plt.ylabel('Latitude')
plt.grid()
plt.legend(title='Vehicle ID', bbox_to_anchor=(1.05, 1), loc='upper left')  # Add legend outside the plot
plt.tight_layout()


# Define the path to save the plot
plot_save_path = os.path.join(PLOT_FOLDER_PATH, "all_trips_per_vehicle.png")

# Save the plot
plt.savefig(plot_save_path, bbox_inches='tight')
print(f"Plot saved to {plot_save_path}")

# Show the plot (optional, can be removed if not needed)
plt.show()


  colors = cm.get_cmap('tab20', num_vehicles)  # Choose a colormap with sufficient colors


Plot saved to ./Plots/all_trips_per_vehicle.png


## Plot velocity

### 5️⃣ KDE Plot (Smooth Distribution)

## What Does "Density" Stand For?

In **statistics and data visualization**, "density" refers to how frequently data points appear in a given range. Instead of showing counts (like a histogram), **density plots estimate the probability distribution** of the data.

- **Higher density (taller peaks)** means more data points are in that range.
- **Lower density (shorter peaks)** means fewer data points are in that range.

For example:
- A **histogram** shows how many data points fall into specific bins.
- A **KDE plot (Kernel Density Estimate)** smooths the histogram to show a continuous probability distribution.


In [12]:
# Flatten all velocity data
all_velocities = [v for df in ALL_merged_dfsS for v in df['velocity']]

# Calculate the total number of lines processed
total_lines = sum(len(df) for df in ALL_merged_dfsS)

# Create the plot
plt.figure(figsize=(10, 6))
plt.title(f'Velocity Density (KDE Plot)')
plt.xlabel('Velocity')

# Plot the KDE
sns.kdeplot(all_velocities, fill=True, color='b')  

plt.grid(True)

# Save the plot
plt.savefig(f"{PLOT_FOLDER_PATH}velocity_density_kde_plot.png")

plt.show()


## Create orientations
- Calculate which the **30 degrees turns** on coordinatess
- Add <u>**orientation column for the 30 degrees turns</u>** to corresponding coordinates
- Plot the **30 degrees turns** vs velocities **before and after** making the turn

| Decimal Places | Latitude Precision (meters) | Longitude Precision (meters at Equator) |
|----------------|-----------------------------|-----------------------------------------|
| 1              | ~11,132 m                   | ~11,132 m                               |
| 2              | ~1,113 m                    | ~1,113 m                                |
| 3              | ~111.3 m                    | ~111.3 m                                |
| 4              | ~11.13 m                    | ~11.13 m                                |
| 5              | ~1.113 m                    | ~1.113 m                                |
| 6              | ~0.1113 m                   | ~0.1113 m                               |
| 7              | ~0.01113 m                  | ~0.01113 m                              |
| 8              | ~0.001113 m                 | ~0.001113 m                             |

In [13]:
def process_coordinates(merged_dfs):
    # Load the dataset into a GeoDataFrame
    gmerged_dfs = gpd.GeoDataFrame(merged_dfs, geometry=gpd.points_from_xy(merged_dfs['lng'], merged_dfs['lat']))

    # Function to calculate precision correspondences
    def calculate_precision_distances():
        precision_list = []
        for decimals in range(1, 9):  # For 1 to 8 decimals
            latitude_precision = 111_320 / (10**decimals)  # Latitude: meters
            longitude_precision = latitude_precision  # Assuming equator (lat = 0)
            precision_list.append((decimals, latitude_precision, longitude_precision))
        return precision_list

    # Calculate precision distances
    precisions = calculate_precision_distances()

    # Create a DataFrame for output
    precision_merged_dfs = pd.DataFrame(precisions, columns=['Decimal Places', 'Latitude Precision (m)', 'Longitude Precision (m)'])
    # print(precision_merged_dfs)

    # Function to calculate the bearing between two points
    def calculate_bearing(lat1, lon1, lat2, lon2):
        """
        Calculate the bearing between two points.
        Returns the bearing in degrees.
        """
        lat1_rad, lon1_rad = math.radians(lat1), math.radians(lon1)
        lat2_rad, lon2_rad = math.radians(lat2), math.radians(lon2)

        delta_lon = lon2_rad - lon1_rad
        x = math.sin(delta_lon) * math.cos(lat2_rad)
        y = math.cos(lat1_rad) * math.sin(lat2_rad) - math.sin(lat1_rad) * math.cos(lat2_rad) * math.cos(delta_lon)
        bearing = math.atan2(x, y)
        bearing_degrees = (math.degrees(bearing) + 360) % 360  # Normalize to 0-360
        return bearing_degrees

    # Function to get orientation from bearing
    def get_orientation(bearing):
        """
        Convert a bearing in degrees to a cardinal direction.
        """
        directions = ['North', 'Northeast', 'East', 'Southeast', 'South', 'Southwest', 'West', 'Northwest']
        idx = round(bearing / 45) % 8  # Divide by 45 degrees for 8 cardinal directions
        return directions[idx]

    # Function to get the latest and next coordinate
    def get_latest_and_next_coordinates(gmerged_dfs):
        latest_coordinate = gmerged_dfs.iloc[-1]  # The last entry is the latest coordinate
        # print("Latest coordinate:", latest_coordinate)

        # Get next coordinates and calculate bearings
        next_coordinates = []
        for idx in range(len(gmerged_dfs) - 1):
            current_row = gmerged_dfs.iloc[idx]
            next_row = gmerged_dfs.iloc[idx + 1]
            bearing = calculate_bearing(current_row['lat'], current_row['lng'], next_row['lat'], next_row['lng'])
            orientation = get_orientation(bearing)
            next_coordinates.append((current_row['lat'], current_row['lng'], next_row['lat'], next_row['lng'], bearing, orientation))
        
        return latest_coordinate, next_coordinates

    # Get the latest and next coordinates
    latest_coordinate, next_coordinates = get_latest_and_next_coordinates(gmerged_dfs)

    # Print the results
    print("\nLatest Coordinate:")
    print(latest_coordinate[['lat', 'lng']])

    print("\nNext Coordinates with Orientation:")
    for coord in next_coordinates:
        print(f"Current: ({coord[0]}, {coord[1]}) -> Next: ({coord[2]}, {coord[3]}) | Bearing: {coord[4]:.2f}° | Orientation: {coord[5]}")

    return latest_coordinate, next_coordinates

def add_orientation_column(merged_dfs):
    # Function to calculate the bearing between two points
    def calculate_bearing(lat1, lon1, lat2, lon2):
        """
        Calculate the bearing between two points.
        Returns the bearing in degrees.
        """
        lat1_rad, lon1_rad = math.radians(lat1), math.radians(lon1)
        lat2_rad, lon2_rad = math.radians(lat2), math.radians(lon2)

        delta_lon = lon2_rad - lon1_rad
        x = math.sin(delta_lon) * math.cos(lat2_rad)
        y = math.cos(lat1_rad) * math.sin(lat2_rad) - math.sin(lat1_rad) * math.cos(lat2_rad) * math.cos(delta_lon)
        bearing = math.atan2(x, y)
        bearing_degrees = (math.degrees(bearing) + 360) % 360  # Normalize to 0-360
        return bearing_degrees

    # Function to get orientation from bearing
    def get_orientation(bearing):
        """
        Convert a bearing in degrees to a cardinal direction.
        """
        directions = ['North', 'Northeast', 'East', 'Southeast', 'South', 'Southwest', 'West', 'Northwest']
        idx = round(bearing / 45) % 8  # Divide by 45 degrees for 8 cardinal directions
        return directions[idx]

    # Add a new column "orientation"
    orientations = []

    # Loop through the DataFrame to calculate the bearing and orientation for each pair of consecutive coordinates
    for idx in range(len(merged_dfs) - 1):
        current_row = merged_dfs.iloc[idx]
        next_row = merged_dfs.iloc[idx + 1]
        
        # Calculate the bearing
        bearing = calculate_bearing(current_row['lat'], current_row['lng'], next_row['lat'], next_row['lng'])
        
        # Get the orientation from the bearing
        orientation = get_orientation(bearing)
        
        # Append the orientation to the list
        orientations.append(orientation)

    # For the last row, we can either leave the orientation empty or set it as "" since it has no "next" coordinate
    orientations.append("")

    # Add the "orientation" column to the DataFrame
    merged_dfs['orientation'] = orientations

    # Return the updated DataFrame
    return merged_dfs

# Assuming 'merged_dfs' is already defined as a DataFrame with columns 'lat' and 'lng'
latest_coordinate, next_coordinates = process_coordinates(merged_dfs)

# Assuming 'merged_dfs' is already defined as a DataFrame with columns 'lat' and 'lng'
merged_dfs = add_orientation_column(merged_dfs)

# Check the updated DataFrame
print(Fore.YELLOW + F"Updated DataFrame" + Style.RESET_ALL)





Latest Coordinate:
lat     37.51023
lng    22.379573
Name: 23967, dtype: object

Next Coordinates with Orientation:
Current: (37.5108333, 22.38571) -> Next: (37.5106033, 22.3859766) | Bearing: 137.40° | Orientation: Southeast
Current: (37.5106033, 22.3859766) -> Next: (37.51064, 22.3859266) | Bearing: 312.78° | Orientation: Northwest
Current: (37.51064, 22.3859266) -> Next: (37.5107499, 22.3859066) | Bearing: 351.79° | Orientation: North
Current: (37.5107499, 22.3859066) -> Next: (37.5108766, 22.3856983) | Bearing: 307.48° | Orientation: Northwest
Current: (37.5108766, 22.3856983) -> Next: (37.5112383, 22.3852933) | Bearing: 318.39° | Orientation: Northwest
Current: (37.5112383, 22.3852933) -> Next: (37.5110733, 22.38541) | Bearing: 150.71° | Orientation: Southeast
Current: (37.5110733, 22.38541) -> Next: (37.5111, 22.3856416) | Bearing: 81.73° | Orientation: East
Current: (37.5111, 22.3856416) -> Next: (37.5110966, 22.385745) | Bearing: 92.37° | Orientation: East
Current: (37.5110966

## Plot **velocities, orientations on coordinates**

In [14]:
plt.figure(figsize=(20, 10))
plt.title('Velocities on Coordinates')
plt.xlabel('Longitude')  # Corrected label
plt.ylabel('Latitude')   # Corrected label
plt.grid()

# Plot the scatter plot with velocity as the color
scatter = plt.scatter(
    merged_dfs['lng'],  # Longitude on X-axis
    merged_dfs['lat'],  # Latitude on Y-axis
    c=merged_dfs['velocity'], 
    cmap='viridis', 
    s=100, 
    edgecolors='k'  # Ensure this matches the marker style
)

# Add color bar
colorbar = plt.colorbar(scatter)
colorbar.set_label('Velocities')

plt.tight_layout()

# Save the plot
plt.savefig(f"{PLOT_FOLDER_PATH}velocities_on_coordinates_scatter.png")

plt.show()


In [15]:
# Filter data to include only points within the BBOX
filtered_df = merged_dfs[
    (merged_dfs['lat'] >= 37.89289455745117) & (merged_dfs['lat'] <= 38.040094390157776) &
    (merged_dfs['lng'] >= 23.110067218537864) & (merged_dfs['lng'] <= 23.765132807878018)
]

# Get the number of data points
num_points = len(filtered_df)

# Create the title with the number of data points
title = f'Velocities on Coordinates (Filtered to BBOX) - {num_points} Data Points'

plt.figure(figsize=(20, 10))
plt.title(title)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid()

# Plot only filtered data
scatter = plt.scatter(
    filtered_df['lng'],  
    filtered_df['lat'],  
    c=filtered_df['velocity'], 
    cmap='viridis', 
    s=100, 
    edgecolors='k'  
)

# Add color bar
colorbar = plt.colorbar(scatter)
colorbar.set_label('Velocities')

plt.tight_layout()

# Save the filtered plot
plt.savefig(f"{PLOT_FOLDER_PATH}velocities_on_coordinates_scatter_filtered.png")

plt.show()


## Filter **noise velocties** (>=51)

In [16]:
count_noise_vel = (merged_dfs['velocity'] >= 51).sum()  # Count the rows where 'velocity' >= 51
total_rows = len(merged_dfs)  # Total number of rows in the DataFrame

# Calculate the percentage
percentage = (count_noise_vel / total_rows) * 100

# Print the results
print(f"Total # of lines of DataSet: {total_rows}")
print(f"Num of velocities >= 51: {count_noise_vel}")
print(Fore.GREEN + f"Percentage of velocities >= 51: {percentage:.2f}%"+ Style.RESET_ALL)


Total # of lines of DataSet: 25428
Num of velocities >= 51: 501
[32mPercentage of velocities >= 51: 1.97%[0m


## Calculate percentage of Tripoli's BBOX data in comparison with all data

In [17]:
# Define bounding box coordinates
latMin, latMax = 37.49764419371479, 37.56244081620044
lngMin, lngMax = 22.344992459074458, 22.521463853839485


# Filter rows that are within the bounding box
in_bbox = merged_dfs[(merged_dfs['lat'] >= latMin) & (merged_dfs['lat'] <= latMax) &
                     (merged_dfs['lng'] >= lngMin) & (merged_dfs['lng'] <= lngMax)]

# Calculate the percentage of rows in the bounding box
percentage_in_bbox = (len(in_bbox) / len(merged_dfs)) * 100

# Print the result
print(Fore.GREEN + f"Percentage of rows inside the bounding box: {percentage_in_bbox:.2f}%" + Style.RESET_ALL)

[32mPercentage of rows inside the bounding box: 100.00%[0m


## Get velocity data and vehicle IDs for rows within specified **BBOX**

In [18]:
def get_velocity_in_area(df, startingLat, startingLng, endingLat, endingLng):
    """
    Get velocity data and vehicle IDs for rows within specified latitude and longitude bounds.

    Args:
        df (pd.DataFrame): The DataFrame containing the data.
        startingLat (float): The starting latitude.
        startingLng (float): The starting longitude.
        endingLat (float): The ending latitude.
        endingLng (float): The ending longitude.

    Returns:
        pd.DataFrame: A DataFrame containing 'vehicleId' and 'velocity' within the specified area.
    """
    try:
        # Filter rows based on latitude and longitude ranges
        filtered_df = df[
            (df['lat'] >= min(startingLat, endingLat)) &
            (df['lat'] <= max(startingLat, endingLat)) &
            (df['lng'] >= min(startingLng, endingLng)) &
            (df['lng'] <= max(startingLng, endingLng))
        ]
        
        # Return the subset of DataFrame with 'vehicleId' and 'velocity' columns
        return filtered_df[['vehicleId', 'velocity']]

    except KeyError as e:
        print(f"Error: Missing required column: {e}")
        return pd.DataFrame(columns=['vehicleId', 'velocity'])
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return pd.DataFrame(columns=['vehicleId', 'velocity'])


startingLat = 37.49764419371479
endingLat   =  37.56244081620044
startingLng = 22.344992459074458
endingLng   = 22.521463853839485


# Get filtered velocities
BBOX_df = get_velocity_in_area(merged_dfs, startingLat, startingLng, endingLat, endingLng)


In [19]:
merged_dfs.dtypes

vehicleId                       int64
lat                           float64
lng                           float64
dateStored                      int64
velocity                      float64
odometer                      float64
engineVoltage                 float64
dateStoredHuman        datetime64[ns]
dateOnlyStoredHuman            object
timeOnly                       object
orientation                    object
dtype: object

## Create **"Trips"** ("Trip" is defined as the difference >= 5 between the current and last date-time for a vehicle)

In [20]:
# Ensure 'dateStoredHuman' is in datetime format
merged_dfs['dateStoredHuman'] = pd.to_datetime(merged_dfs['dateStoredHuman'])

# Sort data by vehicleId and dateStoredHuman
merged_dfs = merged_dfs.sort_values(by=['vehicleId', 'dateStoredHuman'])

# Compute time differences between consecutive rows within each vehicle in minutes
merged_dfs['seconds_diff'] = merged_dfs.groupby('vehicleId')['dateStoredHuman'].diff().dt.total_seconds()    #.div(60)

# Print seconds_diff for debugging
print(merged_dfs[['vehicleId', 'dateStoredHuman', 'seconds_diff']])

# Define a new trip whenever the gap is 5 minutes or more, ensuring trip IDs reset per vehicle
merged_dfs['trip_id'] = merged_dfs.groupby('vehicleId', group_keys=False)['seconds_diff'].apply(lambda x: (x >= 300).cumsum()).reset_index(drop=True)  # 5min * 60 secs/min = 300 sces == 5min

# Fill NaN trip IDs (first row of each vehicle) with 0
merged_dfs['trip_id'] = merged_dfs['trip_id'].fillna(0).astype(int)






all_vehicles_data_path = "../../DataSets/API_Responses/Vehicle_Data/all_vehicle_responses.csv"
merged_dfs.to_csv(all_vehicles_data_path, index=False)
# print(Fore.GREEN + f"DataFrame stored to {all_vehicles_data_path}" + Style.RESET_ALL)


       vehicleId     dateStoredHuman  seconds_diff
89             1 2024-06-06 17:02:17           NaN
90             1 2024-06-06 17:02:20           3.0
91             1 2024-06-06 17:02:25           5.0
92             1 2024-06-06 17:02:31           6.0
93             1 2024-06-06 17:02:37           6.0
...          ...                 ...           ...
56649         20 2025-03-06 13:09:13           4.0
56650         20 2025-03-06 13:09:15           2.0
56651         20 2025-03-06 13:09:19           4.0
56652         20 2025-03-06 13:09:21           2.0
56653         20 2025-03-06 13:09:24           3.0

[25428 rows x 3 columns]


## Calculate **Acceleration**

In [21]:
# Compute velocity difference between consecutive rows
merged_dfs['velocity_diff'] = merged_dfs['velocity'].diff()

# Compute acceleration: velocity change / time change
merged_dfs['acceleration'] = merged_dfs['velocity_diff'] / merged_dfs['seconds_diff']

# Reset acceleration to 0.0 when either vehicleId or trip_id changes
merged_dfs.loc[(merged_dfs['vehicleId'].diff() != 0) | (merged_dfs['trip_id'].diff() != 0), 'acceleration'] = 0.0

# Handle NaN values (e.g., first row, or division by zero)
merged_dfs['acceleration'] = merged_dfs['acceleration'].fillna(0.0)



# Save the updated DataFrame to CSV
all_vehicles_data_path = "../../DataSets/API_Responses/Vehicle_Data/all_vehicle_responses.csv"
merged_dfs.to_csv(all_vehicles_data_path, index=False)
print(f"DataFrame stored to {all_vehicles_data_path}")


DataFrame stored to ../../DataSets/API_Responses/Vehicle_Data/all_vehicle_responses.csv


### Plot Velocity

### Plot Acceleration

In [22]:
merged_dfs = merged_dfs.reset_index()
x = merged_dfs['index'].values
y = merged_dfs['acceleration'].values
# Plot acceleration
plt.figure(figsize=(10, 6))
plt.plot(x, y, marker='o', linestyle='None', color='b')
plt.title('Acceleration Over Time')
plt.xlabel('Timestamp')
plt.ylabel('Acceleration (m/s^2)')
plt.grid(True)
plt.tight_layout()

# Save the filtered plot
plt.savefig(f"{PLOT_FOLDER_PATH}velocities_on_coordinates_scatter_filtered.png")
plt.show()  # Display the plot


## Filter problematic **accelerations** (< -0.50)

In [23]:
# merged_dfs["isProblem"] = merged_dfs["acceleration"].apply(lambda x: 1 if x < -0.50 else 0) #! isProblem: 1 == False, 0 == True

# print(merged_dfs)

In [24]:
all_vehicles_data_path = "../../DataSets/API_Responses/Vehicle_Data/all_vehicle_responses.csv"
merged_dfs.to_csv(all_vehicles_data_path, index=False)
print(Fore.GREEN + f"DataFrame stored to {all_vehicles_data_path}" + Style.RESET_ALL)

[32mDataFrame stored to ../../DataSets/API_Responses/Vehicle_Data/all_vehicle_responses.csv[0m


## **Convert to GeoJSON and create Geo.json and Geo.csv**

In [25]:
import json
import os
import logging
import pandas as pd
from typing import Dict

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

class GeoJSONCreator:
    """Base class for creating and saving GeoJSON files."""

    def save_geojson(self, geojson_data: Dict, file_path: str):
        """Saves the GeoJSON data to a file."""
        try:
            os.makedirs(os.path.dirname(file_path), exist_ok=True)
            with open(file_path, "w", encoding="utf-8") as f:
                json.dump(geojson_data, f, ensure_ascii=False, indent=2)
            logging.info(f"GeoJSON data saved to {file_path}")
        except Exception as e:
            logging.error(f"Error saving GeoJSON: {e}")

class VehicleGeoJSONCreator(GeoJSONCreator):
    """Creates GeoJSON for vehicle tracking data."""
    
    def from_dataframe(self, df: pd.DataFrame, geojson_file: str):
        """Converts a Pandas DataFrame to GeoJSON format and saves the GeoJSON file."""
        try:
            # Ensure datetime columns are converted to strings
            datetime_columns = ["dateStored", "dateStoredHuman", "dateOnlyStoredHuman"]
            for col in datetime_columns:
                if col in df.columns:
                    df[col] = df[col].astype(str)
            
            # Convert DataFrame to GeoJSON
            geojson_data = self.df_to_geojson(df)
            
            # Save GeoJSON file
            self.save_geojson(geojson_data, geojson_file)
            
            logging.info(f"GeoJSON file successfully written: {geojson_file}")
        except Exception as e:
            logging.error(f"Error processing GeoJSON file: {e}")
    
    def df_to_geojson(self, df: pd.DataFrame) -> Dict:
        """Converts a Pandas DataFrame to GeoJSON format."""
        geojson = {"type": "FeatureCollection", "features": []}
        
        for _, row in df.iterrows():
            feature = {
                "type": "Feature",
                "properties": {
                    "vehicleId": int(row["vehicleId"]) if "vehicleId" in df.columns and pd.notna(row["vehicleId"]) else None,
                    "dateStored": str(row["dateStored"]) if "dateStored" in df.columns else None,
                    "velocity": float(row["velocity"]) if "velocity" in df.columns and pd.notna(row["velocity"]) else 0.0,
                    "odometer": float(row["odometer"]) if "odometer" in df.columns and pd.notna(row["odometer"]) else 0.0,
                    "engineVoltage": float(row["engineVoltage"]) if "engineVoltage" in df.columns and pd.notna(row["engineVoltage"]) else 0.0,
                    "dateStoredHuman": str(row["dateStoredHuman"]) if "dateStoredHuman" in df.columns else None,
                    "dateOnlyStoredHuman": str(row["dateOnlyStoredHuman"]) if "dateOnlyStoredHuman" in df.columns else None,
                    "timeOnly": str(row["timeOnly"]) if "timeOnly" in df.columns else None,
                    "orientation": str(row["orientation"]) if "orientation" in df.columns else None,
                    "seconds_diff": float(row["seconds_diff"]) if "seconds_diff" in df.columns and pd.notna(row["seconds_diff"]) else None,
                    "acceleration": float(row["acceleration"]) if "acceleration" in df.columns and pd.notna(row["acceleration"]) else 0.0,
                    "isProblem": int(row["isProblem"]) if "isProblem" in df.columns and pd.notna(row["isProblem"]) else 0,
                    "lng": float(row["lng"]) if "lng" in df.columns and pd.notna(row["lng"]) else 0.0,
                    "lat": float(row["lat"]) if "lat" in df.columns and pd.notna(row["lat"]) else 0.0
                },
                "geometry": {
                    "type": "Point",
                    "coordinates": [
                        float(row["lng"]) if "lng" in df.columns and pd.notna(row["lng"]) else 0.0,
                        float(row["lat"]) if "lat" in df.columns and pd.notna(row["lat"]) else 0.0
                    ]
                }
            }
            geojson["features"].append(feature)
        
        return geojson


geojson_file = "../../DataSets/GeoJSON/all_vehicles_GeoJSON.json"

# Generate and save GeoJSON
vehicle_creator = VehicleGeoJSONCreator()
vehicle_creator.from_dataframe(merged_dfs, geojson_file)


2025-03-23 17:06:09,375 - INFO - GeoJSON data saved to ../../DataSets/GeoJSON/all_vehicles_GeoJSON.json
2025-03-23 17:06:09,376 - INFO - GeoJSON file successfully written: ../../DataSets/GeoJSON/all_vehicles_GeoJSON.json


### **Get data from Overpass API**

In [26]:
# Define Overpass API endpoint
url = "https://overpass-api.de/api/interpreter"

# Define bounding box coordinates
latMin, latMax = 37.49764419371479, 37.56244081620044
lngMin, lngMax = 22.344992459074458, 22.521463853839485

# Overpass QL query to get all types of roads within the bounding box
query = f"""
[out:json];
(
  way({latMin},{lngMin},{latMax},{lngMax})["highway"];
);
out geom;
"""


# Make request to Overpass API
response = requests.post(url, data=query)

# Check if the response status is OK (status code 200)
if response.status_code == 200:
    try:
        # Try to parse the JSON response
        osm_data = response.json()
        elements = osm_data.get("elements", [])

        # Extract relevant data
        named_streets_data = []
        for element in elements:
            tags = element.get("tags", {})
            if 'name' in tags:
                # Convert string representation of coordinates to list
                coords_str = element.get("geometry", [])
                named_streets_data.append([
                    element["id"],
                    "way",
                    tags.get('name', 'Unnamed Street'),
                    tags.get('highway', None),
                    tags.get('name', None),
                    coords_str
                ])

        # Convert to DataFrame
        df_named_streets = pd.DataFrame(named_streets_data, columns=["id", "type", "name", "highway", "name_en", "coordinates"])

        # Sort the DataFrame by the 'name' column (alphabetically)
        df_named_streets = df_named_streets.sort_values(by='name')

        # Group by the 'name' column and merge coordinates
        df_grouped = df_named_streets.groupby('name').agg({
            'id': 'first',  # Keep the first ID for the road
            'type': 'first',  # Keep the first type (way)
            'highway': 'first',  # Keep the first highway type
            'name_en': 'first',  # Keep the first name_en
            'coordinates': lambda x: sum(x, [])  # Merge all coordinates into one list
        }).reset_index()

        # Define the correct file path (including filename)
        directory = "../../DataSets/API_Responses/OSM"
        file_path = os.path.join(directory, "named_streets_grouped.csv")

        # Ensure the directory exists
        os.makedirs(directory, exist_ok=True)

        # Save the grouped DataFrame to CSV (append if the file exists)
        df_grouped.to_csv(file_path, index=False, mode="w", header=True)


        print(f"Grouped streets data saved to '{file_path}'")

    except ValueError as e:
        # Handle JSON parsing errors
        print(f"Error parsing JSON response: {e}")
else:
    print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")
    print("Response Text: ", response.text)


Grouped streets data saved to '../../DataSets/API_Responses/OSM/named_streets_grouped.csv'


## Create GEOJSON format

In [27]:
# Load the CSV file
csv_file = "../../DataSets/API_Responses/OSM/OSM_roads.csv"  # Change this to your actual CSV filename
df = pd.read_csv(csv_file)

# Function to convert coordinate strings to GeoJSON LineString format
def parse_coordinates(coord_str):
    coords = json.loads(coord_str.replace("'", '"'))  # Ensure valid JSON format
    return [[c["lon"], c["lat"]] for c in coords]  # GeoJSON uses [longitude, latitude]

# Build GeoJSON structure
geojson = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in df.iterrows():
    feature = {
        "type": "Feature",
        "properties": {
            "name": row["name"],
            "highway": row["highway"]
        },
        "geometry": {
            "type": "LineString",
            "coordinates": parse_coordinates(row["coordinates"])
        }
    }
    geojson["features"].append(feature)

# Save the output to a GeoJSON file
output_file = "../../DataSets/GeoJSON/tripoli_roads.geojson"
with open(output_file, "w", encoding="utf-8") as f:
    json.dump(geojson, f, ensure_ascii=False, indent=4)

print(f"GeoJSON file saved as {output_file}")


GeoJSON file saved as ../../DataSets/GeoJSON/tripoli_roads.geojson


In [28]:
# Initialize the necessary class
ArcGIS_road_creator = RoadGeoJSONCreator()

# Function to get road data in dictionary format
def get_road_data(road_name):
    """Retrieves road data from DataFrame in dictionary format."""
    filtered_data = df_named_streets[df_named_streets['name'] == road_name]

    if not filtered_data.empty:
        for _, row in filtered_data.iterrows():
            coords = row["coordinates"]  # Get the coordinates from the DataFrame
            
            # Check if coordinates are already a list (which they should be)
            if isinstance(coords, list):
                # Coordinates are already a list, so no need for parsing
                road_data = {
                    "name": row["name"],
                    "highway": row["highway"],
                    "name_en": row["name_en"] if "name_en" in row else row["name"],  # Fallback to 'name' if no English name
                    "coordinates": [{"lat": coord['lat'], "lon": coord['lon']} for coord in coords],
                }
            else:
                print(f"Error: Coordinates for road '{road_name}' are not in the expected list format.")
                return None
            
            return road_data
    else:
        print(f"Road with name '{road_name}' not found.")
        return None

# Example usage
road_name_input = "Αλεξάνδρου-Σούτσου"
road_data = get_road_data(road_name_input)

# If road data exists, convert to GeoJSON and save
if road_data:
    # Updated the directory to save the GeoJSON files in the new path
    directory = "../../DataSets/GeoJSON"
    os.makedirs(directory, exist_ok=True)  # Ensure the directory exists
    file_path = os.path.join(directory, f"{road_name_input.replace(' ', '_')}.geojson")
    
    # Check if the GeoJSON creator exists and is callable
    if hasattr(ArcGIS_road_creator, 'from_dict'):
        ArcGIS_road_creator.from_dict(road_data, file_path)
    else:
        print("GeoJSON Creator method 'from_dict' is not available.")
else:
    print("No road data to save.")


NameError: name 'RoadGeoJSONCreator' is not defined