In [2]:
from source.config import DATA_DIR, RAW_DATA_DIR, PROCESSED_DATA_DIR, REPORTS_DIR
from source.utils import sanitize_filename
from source.features_dir.estimated_registrations import SUBPATH
from pathlib import Path
import pandas as pd
import os

[32m2025-02-16 19:10:28.703[0m | [1mINFO    [0m | [36msource.config[0m:[36m<module>[0m:[36m13[0m - [1mPROJ_ROOT path is: /home/anders/engasjement_svv[0m


In [4]:
def percentage_74t_registrations(subpath: str):
    
    def get_all_registrations(unsanitized_location: str):
        '''
        Helper function to load and preprocess registration data.
        '''
        sanitized_filename = sanitize_filename(unsanitized_location)
        df = pd.read_csv(RAW_DATA_DIR / 'estimated_registrations' / 'total_registrations_bwim74t' / f'{sanitized_filename}.csv', sep=';', encoding_errors='ignore')
        df = df[df['Felt'] == 'Totalt']
        df['Year'] = pd.to_datetime(df['Fra']).dt.year
        return df

    # Define paths
    processed_data_project_path = PROCESSED_DATA_DIR / 'estimated_registrations' / subpath

    # Load data
    df_truck = pd.read_csv(processed_data_project_path / 'final-truck_only.csv')
    df_trailer = pd.read_csv(processed_data_project_path / 'final-trailer_only.csv')
    dfs = [df_truck, df_trailer]

    # Process each DataFrame (truck and trailer)
    for i, df_74t in enumerate(dfs):
        for road in df_74t['Vei'].unique():  # Use .unique() to avoid duplicates
            data = []   
            df_all_registrations_for_road = get_all_registrations(road)
            
            for year in [2021, 2022, 2023, 2024]:
                # Extract total registrations for 16-24m and >=24m categories
                all_registrations_for_year_in_16_24_category = df_all_registrations_for_road[df_all_registrations_for_road['Year'] == year]['16,0m - 24,0m'].values[0]
                all_registrations_for_year_in_above_24_category = df_all_registrations_for_road[df_all_registrations_for_road['Year'] == year]['>= 24,0m'].values[0]
                all_registrations_year_16_and_above = all_registrations_for_year_in_16_24_category + all_registrations_for_year_in_above_24_category
                
                # Extract 74t registrations for the given year and road
                n_74t_registrations_year_road_60 = df_74t[df_74t['Vei'] == road][f'{year} 60t'].values[0]
                n_74t_registrations_year_road_65 = df_74t[df_74t['Vei'] == road][f'{year} 65t'].values[0]
                n_74t_registrations_year_road_68 = df_74t[df_74t['Vei'] == road][f'{year} 68t'].values[0]
                n_74t_registrations_year_road_74 = df_74t[df_74t['Vei'] == road][f'{year} 74t'].values[0]
                n_74t_registrations_year_road_total = n_74t_registrations_year_road_60 + n_74t_registrations_year_road_65 + n_74t_registrations_year_road_68 + n_74t_registrations_year_road_74

                # Calculate percentage
                percentage_74t_registrations_year_road = (n_74t_registrations_year_road_total / all_registrations_year_16_and_above) * 100

                # Append data
                data.append({
                    'År': year,
                    '60t (3+4)': n_74t_registrations_year_road_60,
                    '65t (4+4)': n_74t_registrations_year_road_65,
                    '68t (3+5)': n_74t_registrations_year_road_68,
                    '74t (4+5)': n_74t_registrations_year_road_74,
                    'Totalt BK74': n_74t_registrations_year_road_total,
                    'Registreringer trafikktellepunkter': all_registrations_year_16_and_above,
                    'Prosent BK74': percentage_74t_registrations_year_road
                })

            # Save data to CSV
            df = pd.DataFrame(data)
            os.makedirs(processed_data_project_path / 'percentages', exist_ok=True)
            df.to_csv(processed_data_project_path / 'percentages' / f"{'trailer' if i else 'truck'}_percentages_{road}.csv", index=False)

# Call the function
percentage_74t_registrations(SUBPATH)