In [2]:
import os
import shutil
from pathlib import Path
from source import config

# Assuming config.RAW_DATA_DIR and config.INTERIM_DATA_DIR are pathlib Path objects
source_dir = config.RAW_DATA_DIR / "WIM"
destination_dir = config.INTERIM_DATA_DIR / "WIM_flat_folder"

# Ensure the destination directory exists
destination_dir.mkdir(parents=True, exist_ok=True)

# Walk through the source directory
for root, _, files in os.walk(source_dir):
    for file in files:
        if not file.endswith(".csv"):
            continue
        source_file = Path(root) / file
        destination_file = destination_dir / file

        # Handle duplicate file names by appending a counter
        counter = 1
        while destination_file.exists():
            destination_file = destination_dir / f"{file.stem}_{counter}{file.suffix}"
            counter += 1

        # Copy the file to the destination directory
        shutil.copy(str(source_file), str(destination_file))

print(f"All files have been copied to {destination_dir}")


All files have been copied to /home/anders/engasjement_svv/data/interim/WIM_flat_folder


In [76]:
import pandas as pd
pd.set_option('display.float_format', '{:.0f}'.format)
df = pd.read_csv('../data/Øysand268.csv')

In [77]:
valid_ranges = { 
    'VehicleLength': (0, 30),
    'AxlesCount': (0, 10),
    'AxleDistance': (0, 10),
    'AxleWeight': (0, 15000)
}

df = df[(df['VehicleLength'] >= valid_ranges['VehicleLength'][0]) & (df['VehicleLength'] <= valid_ranges['VehicleLength'][1])]
df = df[(df['AxlesCount'] >= valid_ranges['AxlesCount'][0]) & (df['AxlesCount'] <= valid_ranges['AxlesCount'][1])]

for col in df.columns:
    if col.startswith('AxleDistance'):
        df = df[
            ((df[col] >= valid_ranges['AxleDistance'][0]) & (df[col] <= valid_ranges['AxleDistance'][1])) | df[col].isna()
        ]
    
    elif col.startswith('AxleWeight'):
        df = df[
            ((df[col] >= valid_ranges['AxleWeight'][0]) & (df[col] <= valid_ranges['AxleWeight'][1])) | df[col].isna()
        ]


In [78]:
df.describe()

Unnamed: 0,VehicleLength,AxlesCount,AxleDistance1,AxleDistance2,AxleDistance3,AxleDistance4,AxleWeight1,AxleWeight2,AxleWeight3,AxleWeight4,...,AxleDistance7,AxleDistance8,AxleDistance9,AxleDistance10,AxleWeight5,AxleWeight6,AxleWeight7,AxleWeight8,AxleWeight9,AxleWeight10
count,4192695,4192695,4192695,4177954,251669,133243,4192695,4177954,251669,133243,...,2635,281,28,1.0,45198,27286,2635,281,28,1.0
mean,6,2,0,3,3,3,1340,1276,3546,3888,...,2,1,2,2.0,5337,6184,5831,5310,8472,9280.0
std,2,1,0,1,2,2,1274,1745,2858,2755,...,0,0,0,,2489,2471,2461,2463,3312,
min,0,1,0,0,0,0,0,0,0,0,...,1,1,1,2.0,0,100,540,1020,1900,9280.0
25%,5,2,0,3,1,1,880,690,700,1480,...,1,1,1,2.0,3160,3980,3480,3350,7362,9280.0
50%,5,2,0,3,3,1,1020,830,3480,3580,...,1,1,2,2.0,4950,6320,6060,5210,9215,9280.0
75%,5,2,0,3,4,6,1240,1050,5240,5800,...,2,1,2,2.0,7220,8100,7795,6540,10628,9280.0
max,27,10,0,10,10,10,14880,15000,15000,14500,...,6,2,2,2.0,14460,14100,14000,14040,14520,9280.0


In [79]:
df.head()

Unnamed: 0,VehicleLength,AxlesCount,AxleDistance1,AxleDistance2,AxleDistance3,AxleDistance4,AxleWeight1,AxleWeight2,AxleWeight3,AxleWeight4,...,AxleDistance7,AxleDistance8,AxleDistance9,AxleDistance10,AxleWeight5,AxleWeight6,AxleWeight7,AxleWeight8,AxleWeight9,AxleWeight10
0,5,2,0,3,,,1150,880,,,...,,,,,,,,,,
1,5,2,0,3,,,960,950,,,...,,,,,,,,,,
2,5,2,0,3,,,990,920,,,...,,,,,,,,,,
3,4,2,0,3,,,930,720,,,...,,,,,,,,,,
4,5,2,0,3,,,970,740,,,...,,,,,,,,,,


In [1]:
TIME = "StartTimeStr"
STARTTIME = "StartTime"
STARTDATE = "StartDate"
AXLES_COUNT = "AxlesCount"
AXLE_WEIGHT = "AxleWeight"
STARTTIME_UNIX = "StartTime"
AXLE_DISTANCE = "AxleDistance"
VEHICLE_LENGTH = "VehicleLength"
LIMIT_AXLES_SAME_GROUP = 1.8
OLD_LIMIT_HEAVY_VEHICLE = 5.6
NEW_LIMIT_HEAVY_VEHICLE = 7.5
MILLISECONDS_IN_YEAR = 1000 * 60 * 60 * 24 * 365

import polars as pl
from datetime import datetime
import source

def calculate_ådtt(df: pl.DataFrame, start_daterange: datetime = None) -> float:

    # dersom veiens åpningsdato ikke er eksplisitt definert benyttes den første registrerte verdien i filen
    if start_daterange is None:
        start_unix = df.select(pl.col(STARTTIME_UNIX).min()).to_numpy()[0, 0]
    else:
        start_unix = int(start_daterange.timestamp() * 1000)
    end_unix = start_unix + MILLISECONDS_IN_YEAR

    heavy_vehicles_first_year = df.filter(
        (pl.col(STARTTIME_UNIX) >= start_unix) & (pl.col(STARTTIME_UNIX) <= end_unix)
    )

    df = df.with_columns(
        (
            pl.col(STARTTIME_UNIX).map_elements(
                lambda ts: datetime.fromtimestamp(int(ts) / 1000).strftime("%Y-%m-%d"),
                return_dtype=pl.String,
            )
        ).alias(STARTDATE)
    )

    unique_days_in_range = df.select(pl.col(STARTDATE)).n_unique()
    number_of_heavy_vehicles = len(heavy_vehicles_first_year)
    ådtt = number_of_heavy_vehicles / unique_days_in_range

    return ådtt

[32m2025-01-10 19:22:42.099[0m | [1mINFO    [0m | [36msource.config[0m:[36m<module>[0m:[36m13[0m - [1mPROJ_ROOT path is: /home/anders/engasjement_svv[0m


In [2]:
df_anestad_vest = pl.read_csv(source.config.INTERIM_DATA_DIR / "WIM_flat_folder" / "20231001-20240123_Aanestad_Vestgående.csv", truncate_ragged_lines=True, separator=";", skip_rows=6)
# df_anestad_vest2 = pl.read_csv(source.config.INTERIM_DATA_DIR / "WIM_flat_folder" / "20240122-20240612_R3 vestgående.csv", truncate_ragged_lines=True, separator=";", skip_rows=6)
# df_anestad_vest3 = pl.read_csv(source.config.INTERIM_DATA_DIR / "WIM_flat_folder" / "20221014-20 Kistler_R3_vestg.csv", truncate_ragged_lines=True, separator=";", skip_rows=6)

: 

In [11]:
df_anestad_vest.head()

Time of Download,Logged in as,Query
str,str,str
"""2024-06-13T13:52:05+0200""","""SystemIntegrator""","""(c.StartTime >= 2024-01-22T00:…"
,,
"""Country	Location""","""Road No.""","""Road Km"""
"""Norge Site Vest""","""Rv3""",
,,


In [3]:
import pandas as pd
import source

df_aanestad_vest_1 = pd.read_csv(
    source.config.INTERIM_DATA_DIR / "WIM_flat_folder" / "20231001-20240123_Aanestad_Vestgående.csv",
    skiprows=6,
    sep=";",
    on_bad_lines="skip",
    # nrows=1000,
    low_memory=False,
)

[32m2025-01-10 23:11:17.827[0m | [1mINFO    [0m | [36msource.config[0m:[36m<module>[0m:[36m13[0m - [1mPROJ_ROOT path is: /home/anders/engasjement_svv[0m


: 