In [None]:
import pandas as pd
from geopandas import GeoDataFrame, read_file

import sys
sys.path.append("..")
import movingpandas as mpd
mpd.show_versions()

import warnings
warnings.simplefilter("ignore")

In [None]:
import csv
import os
import shutil
import xml.etree.ElementTree as ET
from datetime import timedelta

import numpy as np
from dateutil import parser


def generate_grid_and_path(x_min, x_max, x_n, y_min, y_max, y_n):
    x_values = np.linspace(x_min, x_max, x_n)
    y_values = np.linspace(y_min, y_max, y_n)
    X, Y = np.meshgrid(x_values, y_values)
    path = []

    for i in range(x_n):
        for j in range(y_n):
            if i % 2 == 0:
                path.append((X[j, i], Y[j, i]))
            else:
                path.append((X[-j - 1, i], Y[-j - 1, i]))

    if x_n % 2 == 0:
        for j in range(y_n):
            for i in range(x_n):
                if j % 2 == 0:
                    path.append((X[j, i], Y[j, i]))
                else:
                    path.append((X[j, -i - 1], Y[j, -i - 1]))
    else:
        for j in range(y_n - 1, -1, -1):
            for i in range(x_n):
                if j % 2 == 0:
                    path.append((X[j, i], Y[j, i]))
                else:
                    path.append((X[j, -i - 1], Y[j, -i - 1]))
    return path

'''
def parse_corrdinates_str_list(kml_path):
    tree = ET.parse(kml_path)
    root = tree.getroot()
    namespace = {"kml": "http://www.opengis.net/kml/2.2"}
    specific_name = "vehicle_global_position:14"
    #namespace = {"kml"}
    #specific_name = "R8039938565"
    coordinates = None
    coordinates_str_list = []
    for placemark in root.findall(".//kml:Placemark", namespaces=namespace):
        name = placemark.find("kml:name", namespaces=namespace)
        if name is not None and name.text == specific_name:
            # Find the LineString element
            line_string = placemark.find(".//kml:LineString", namespaces=namespace)
            if line_string is not None:
                # Extract the coordinates
                coordinates = line_string.find("kml:coordinates", namespaces=namespace)
                if coordinates is not None:
                    coordinates_str_list.append(coordinates.text.strip())
    return coordinates_str_list
'''
def parse_corrdinates_str_list(kml_path):
    tree = ET.parse(kml_path)
    root = tree.getroot()
    coordinates = None
    coordinates_str_list = []
    for placemark in root.findall(".//Placemark"):
        name = placemark.find("name")
        if name is not None :
            # Find the LineString element
            line_string = placemark.find(".//LineString")
            if line_string is not None:
                # Extract the coordinates
                coordinates = line_string.find("coordinates")
                if coordinates is not None:
                    coordinates_str_list.append(coordinates.text.strip())
    return coordinates_str_list

def get_means(coordinates):
    mean_x = np.mean([coord[0] for coord in coordinates])
    mean_y = np.mean([coord[1] for coord in coordinates])
    return mean_x, mean_y


def write_to_csv(
    csv_file_path, coordinates, trajectory_id, tracker, base_timestamp, mean_x, mean_y, grid_size_half=10e-5, xy_n=10
):
    base_time = parser.parse(base_timestamp)
    with open(csv_file_path, "w", newline="") as csv_file:
        writer = csv.writer(csv_file, delimiter=";")

        headers = ["X", "Y", "fid", "id", "sequence", "trajectory_id", "tracker", "t"]
        writer.writerow(headers)
        for i, coord in enumerate(coordinates, start=1):
            x, y, _ = coord
            timestamp = (base_time + timedelta(seconds=1 * (i - 1))).isoformat()
            row = [x, y, i, i, i, trajectory_id, tracker, timestamp]
            writer.writerow(row)

        path = generate_grid_and_path(
            mean_x - grid_size_half,
            mean_x + grid_size_half,
            xy_n,
            mean_y - grid_size_half,
            mean_y + grid_size_half,
            xy_n,
        )
        for i, coord in enumerate(path, start=len(coordinates) + 1):
            x, y = coord
            timestamp = (base_time + timedelta(seconds=1 * (i - 1))).isoformat()
            row = [x, y, i, i, i, trajectory_id + 1, tracker, timestamp]
            writer.writerow(row)

    print(f"Saved to {csv_file_path}")


def convert_kml_to_csv(kml_path):
    kml_name = kml_path.split("/")[-1].split(".")[0]

    #print(parse_corrdinates_str_list(kml_path))
    coordinates_str_list = parse_corrdinates_str_list(kml_path)[0].split()
    coordinates = [[float(coord) for coord in coord_str.split(",")] for coord_str in coordinates_str_list]
    mean_x, mean_y = get_means(coordinates)
    print(f"Min x: {min([coord[0] for coord in coordinates])}, Min y: {min([coord[1] for coord in coordinates])}")
    print(f"Max x: {max([coord[0] for coord in coordinates])}, Max y: {max([coord[1] for coord in coordinates])}")
    xy_n =61
    grid_size_half = 1 / 111_111 * xy_n # Convert to meter

    print(f"Mean x: {mean_x}, Mean y: {mean_y}")

    trajectory_id = 1
    tracker = 19
    base_timestamp = "2008-12-11 04:42:14+00"

    csv_file_path = f"./data/csv/{kml_name}.csv"
    
    write_to_csv(
        csv_file_path,
        coordinates,
        trajectory_id,
        tracker,
        base_timestamp,
        mean_x,
        mean_y,
        grid_size_half,
        xy_n=xy_n,
    )
    # shutil.copy(csv_file_path, os.path.expanduser("./data/"))
    print(f"Copied to ./data/{kml_name}.csv")

def get_specific_filenames(directory, extension):
    filenames = [filename for filename in os.listdir(directory) if filename.endswith(extension)]
    return filenames


# Set the directory path and file extension
directory_path = "./data/raw_kml"
file_extension = ".kml"  

# find all kml files in the directory
kml_pathes = get_specific_filenames(directory_path, file_extension)


kml_pathes = [f"./data/raw_kml/{kml_path}" for kml_path in kml_pathes]

for kml_path in kml_pathes:
    convert_kml_to_csv(kml_path)

In [5]:
from math import radians, sin, cos, sqrt, atan2

def haversine_distance(lat1, lon1, lat2, lon2):
    
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = 6371 * c # Radius of earth in kilometers

    return distance


In [16]:
def calculate_total_distance(file_path):
    total_distance = 0
    with open(file_path, 'r') as file:
        reader = csv.reader(file, delimiter=';')
        headers = next(reader)  # Skip header
        prev_lat, prev_lon = None, None
        for row in reader:
            if row[5] == '1':
                lat, lon = map(float, row[:2])  # Assuming latitude is in first column, longitude in second
                if prev_lat is not None and prev_lon is not None:
                    total_distance += haversine_distance(prev_lat, prev_lon, lat, lon)
                prev_lat, prev_lon = lat, lon
    return total_distance

In [20]:
directory_path = "./data/csv"
file_extension = ".csv"  

# find all csv files in the directory
csv_pathes = get_specific_filenames(directory_path, file_extension)
#output_file = "./data/test.csv"
output_file = "./data/distances.csv"

with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=';')
    # write header
    writer.writerow(['csv_path', 'total_distance(m)'])
    
    # calculate total distance for each csv file
    for csv_path in csv_pathes:
        file_path = os.path.join(directory_path, csv_path)
        total_distance = calculate_total_distance(file_path)*1000
        # write to csv
        writer.writerow([csv_path, total_distance])

print("Distances saved to", output_file)

Distances saved to ./data/distances.csv


![alt text](./img/distance.png)
