In [12]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [13]:
import os
import pickle
from source.config import INTERIM_DATA_DIR, TESTING_DATA_DIR
from source.utils import sanitize_filename


def filter_points_near_road(
    df: pd.DataFrame, road_lon: float, road_lat: float, radius_km: float = 5
):
    """
    Filters points within a given radius (in km) from a single road location.

    Parameters:
        df (pd.DataFrame): DataFrame with 'Latitude' and 'Longitude' columns.
        road_lon (float): Longitude of the road.
        road_lat (float): Latitude of the road.
        radius_km (float): Radius in kilometers for filtering points.

    Returns:
        pd.DataFrame: Filtered DataFrame with points within the radius.
    """
    # Convert points and road coordinates to radians
    lat1, lon1 = np.radians(road_lat), np.radians(road_lon)
    lat2, lon2 = np.radians(df["Latitude"].values), np.radians(df["Longitude"].values)

    # Compute Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    distances = 6371 * c  # Earth's radius in km

    # Filter points within the radius
    df_filtered = df[distances <= radius_km].reset_index(drop=True)
    df_filtered = df_filtered[(df_filtered['Hastighet'] > 40) & (df_filtered['Hastighet'].notna()) & (df_filtered['Hastighet'].notnull())]
    return df_filtered

def load_polygon_boundary_from_file(road, subpath):
    file_path = os.path.join(
        INTERIM_DATA_DIR / "estimated_registrations",
        subpath,
        f"{sanitize_filename(road)}_boundary.pkl",
    )
    if os.path.exists(file_path):
        with open(file_path, "rb") as f:
            polygon_boundary = pickle.load(f).to_crs("EPSG:4326").geometry.iloc[0]
        return polygon_boundary
    else:
        file_path = os.path.join(
            INTERIM_DATA_DIR / "estimated_registrations",
            subpath,
            f"{road}_boundary.pkl",
        )
        if os.path.exists(file_path):
            with open(file_path, "rb") as f:
                polygon_boundary = pickle.load(f).to_crs("EPSG:4326").geometry.iloc[0]
            return polygon_boundary
        else:
            file_path = os.path.join(
                TESTING_DATA_DIR
                / "estimated_registrations"
                / f"{sanitize_filename(road)}_boundary.pkl",
            )
            if os.path.exists(file_path):
                with open(file_path, "rb") as f:
                    polygon_boundary = pickle.load(f).to_crs("EPSG:4326").geometry.iloc[0]
                return polygon_boundary
            else:
                raise FileNotFoundError(f"No polygon boundary file found for road: {file_path}")



In [14]:
df = pd.read_csv('../data/interim/estimated_registrations/processed-truck_only.csv')

In [15]:
dff = filter_points_near_road(df, 11.570661932248345, 60.88848554945865, 5)

In [16]:
polygon_boundary = load_polygon_boundary_from_file('tangensvingen_bru_tangensvingen_vest', 'bwim74t')

In [17]:
from shapely import Point


df_result = dff[dff.apply(
    lambda entry: polygon_boundary.contains(Point(entry["Longitude"], entry["Latitude"])), axis=1)
]


In [18]:
import pandas as pd
from IPython.display import display, HTML
print(len(df_result))
# Make the DataFrame scrollable by wrapping it in HTML
scrollable_html = df_result.head(1000).to_html(classes='table table-bordered table-condensed')
scrollable_df = f'<div style="max-height: 400px; overflow-y: scroll;">{scrollable_html}</div>'

display(HTML(scrollable_df))
df = df_result

8838


Unnamed: 0,VIN,Dato,Latitude,Longitude,Hastighet,Tonnage
446,YS2R6X40002170509,2021-07-12 10:59:23,60.895027,11.58152,68.0,60
447,YS2R6X40002170509,2021-07-12 12:35:07,60.885715,11.567313,43.0,60
470,YS2R6X40002170509,2021-07-13 04:50:30,60.900692,11.595778,68.0,60
474,YS2R6X40002170509,2021-07-13 07:32:21,60.88718,11.568689,58.0,60
475,YS2R6X40002170509,2021-07-13 07:34:00,60.898785,11.592223,68.0,60
506,YS2R6X40002170509,2021-07-14 07:43:07,60.887726,11.569538,58.0,60
507,YS2R6X40002170509,2021-07-14 07:44:56,60.900143,11.594797,72.0,60
651,YS2R6X40002170509,2021-07-15 13:07:42,60.896595,11.586708,72.0,60
732,YS2R6X40002170509,2021-07-16 09:35:05,60.900543,11.595408,61.0,60
750,YS2R6X40002170509,2021-07-16 10:29:20,60.899097,11.592834,61.0,60


In [None]:
import folium

map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=6)

# Add markers for each location
for _, row in df.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
    ).add_to(my_map)

# Display the map
my_map

In [20]:
import folium
from folium.plugins import MarkerCluster


map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=6)

# Initialize the MarkerCluster
marker_cluster = MarkerCluster().add_to(my_map)

# Add markers to the cluster
for _, row in df.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
    ).add_to(marker_cluster)

# Display the map
my_map