# Updating DLC and Misplaced Flag Association 


This script addresses the issue of misplaced flag values in a dataset with a dynamic DLC (Data Length Code) structure.
The data includes columns such as timestamp, canid, dlc, byte0 through byte7, and flag. When the `dlc` is
the maximum value (8), the flag is stored in its designated column. However, for rows where `dlc` is less than the maximum 
value, the flag is incorrectly written into the byte columns (e.g., byte2 through byte7). 

The `update_dlc_flag_association` function orchestrates these transformations using modular helper methods.

The above method ensure proper handling of the flag data:
1. Identify misplaced flags and set them in their new location.
2. Nullify any byte columns containing misplaced flag values.
3. Ensure the flag column accurately reflects the flag value for maximum `dlc`.


## Import Data and Libraries

In [1]:
import polars as pl
import datetime as dt
%run utils.ipynb

In [2]:


dos_df_path,fuzzy_df_path,attack_free_df_path=load_data_paths_from_config_to_notebooks("out_paths")

dos_df = pl.read_csv(dos_df_path)
fuzzy_df = pl.read_csv(fuzzy_df_path)


In [3]:
# dos_df = dos_df.with_columns(
#     pl.when(pl.col("dlc") != 8)
#     .then(
#         pl.when(pl.col("dlc") == 0).then(pl.col("byte0"))
#         .when(pl.col("dlc") == 1).then(pl.col("byte1"))
#         .when(pl.col("dlc") == 2).then(pl.col("byte2"))
#         .when(pl.col("dlc") == 3).then(pl.col("byte3"))
#         .when(pl.col("dlc") == 4).then(pl.col("byte4"))
#         .when(pl.col("dlc") == 5).then(pl.col("byte5"))
#         .when(pl.col("dlc") == 6).then(pl.col("byte6"))
#         .when(pl.col("dlc") == 7).then(pl.col("byte7"))
#         .otherwise(None)
#     )
#     .alias("flag_temp")
# )
# for i in range(8):
#     dos_df = dos_df.with_columns(
#         pl.when(pl.col("dlc") == i)
#         .then(None)  # Set to null if dlc matches the byte column
#         .otherwise(pl.col(f"byte{i}"))  # Keep the original value otherwise
#         .alias(f"byte{i}")  # Update the byte column
#     )

# dos_df = dos_df.with_columns(
#     pl.when(pl.col("dlc")== 8)
#     .then(pl.col("flag"))
#     .otherwise(pl.col("flag_temp"))
#     .alias("flag_temp")
# )




## Methods

In [None]:
def  set_new_flag_for_non_max_dlc(df,max_dlc_value,existing_dlc_column_name,new_flag_column_name):
    """
    Sets new flag values for rows where `dlc` is less than the maximum value.

    Parameters
    ----------
    df : DataFrame
        The input dataframe.
    max_dlc_value : int
        The maximum value of DLC.
    existing_dlc_column_name : str
        Name of the column containing the current DLC values.
    new_flag_column_name : str
        Name of the column to store the new flag values.

    Returns
    -------
    DataFrame
        Updated dataframe with new flag values for non-maximum DLC rows.
    """
    return df.with_columns(
        pl.when(pl.col("dlc") != max_dlc_value)
        .then(
            pl.when(pl.col(existing_dlc_column_name) == 0).then(pl.col("byte_0"))
            .when(pl.col(existing_dlc_column_name) == 1).then(pl.col("byte_1"))
            .when(pl.col(existing_dlc_column_name) == 2).then(pl.col("byte_2"))
            .when(pl.col(existing_dlc_column_name) == 3).then(pl.col("byte_3"))
            .when(pl.col(existing_dlc_column_name) == 4).then(pl.col("byte_4"))
            .when(pl.col(existing_dlc_column_name) == 5).then(pl.col("byte_5"))
            .when(pl.col(existing_dlc_column_name) == 6).then(pl.col("byte_6"))
            .when(pl.col(existing_dlc_column_name) == 7).then(pl.col("byte_7"))
            .otherwise(None)
        )
        .alias(new_flag_column_name)
    )


In [None]:
def set_byte_to_null_if_byte_contains_flag(df, existing_dlc_column_name):
    """
    Nullifies byte columns containing misplaced flag values.

    Parameters
    ----------
    df : DataFrame
        The input dataframe.
    existing_dlc_column_name : str
        Name of the column containing the current DLC values.

    Returns
    -------
    DataFrame
        Updated dataframe with nullified byte columns containing misplaced flags.
    """
    
    for i in range(8):
        df = df.with_columns(
            pl.when(pl.col(existing_dlc_column_name) == i)
            .then(None)  # Set to null if dlc matches the byte column
            .otherwise(pl.col(f"byte_{i}"))  # Keep the original value otherwise
            .alias(f"byte_{i}")  # Update the byte column
        )
    return df


In [6]:
def  set_new_flag_for_max_dlc(df,max_dlc_value,existing_dlc_column_name,existing_flag_column_name,new_flag_column_name):
    """
    Corrects the flag column for rows where `dlc` equals the maximum value.

    Parameters
    ----------
    df : DataFrame
        The input dataframe.
    max_dlc_value : int
        The maximum value of DLC.
    existing_dlc_column_name : str
        Name of the column containing the current DLC values.
    existing_flag_column_name : str
        Name of the column containing the current flag values.
    new_flag_column_name : str
        Name of the column to store the corrected flag values.

    Returns
    -------
    DataFrame
        Updated dataframe with corrected flag values for maximum DLC rows.
    """
    
    return df.with_columns(
        pl.when(pl.col(existing_dlc_column_name)== max_dlc_value)
        .then(pl.col(existing_flag_column_name))
        .otherwise(pl.col(new_flag_column_name))
        .alias(new_flag_column_name)
    )


In [7]:
def drop_column(df,column_name):
    return df.drop(column_name)


In [8]:
def update_dlc_flag_association(df,max_dlc_value,existing_dlc_column_name,existing_flag_column_name,new_flag_column_name):
    """
    Updates flag associations by handling misplaced flags and cleaning byte columns, deleting old flag columns.

    Parameters
    ----------
    df : DataFrame
        The input dataframe containing byte, flag, and DLC columns.
    max_dlc_value : int
        The maximum value of DLC.
    existing_dlc_column_name : str
        Name of the column containing the current DLC values.
    existing_flag_column_name : str
        Name of the column containing the flag values.
    new_flag_column_name : str
        Name of the column to store the updated flag values.

    Returns
    -------
    DataFrame
        Updated dataframe with corrected flag associations.
    """
    df=set_new_flag_for_non_max_dlc(df,max_dlc_value,existing_dlc_column_name,new_flag_column_name)
    df=set_byte_to_null_if_byte_contains_flag(df, existing_dlc_column_name)
    df=set_new_flag_for_max_dlc(df,max_dlc_value,existing_dlc_column_name,existing_flag_column_name,new_flag_column_name)
    df=drop_column(df,existing_flag_column_name)
    return df
    

In [9]:
def update_multiple_dfs_dlc_flag_association(
    dfs,max_dlc_value,existing_dlc_column_name,existing_flag_column_name,new_flag_column_name
):
    """
    Convert multiple dfs' timestamp column into datetime column.

    Parameters
    ----------
    dfs : list
        List of DataFrame.
    new_column_name : str
        Name of new column to be added
    existing_column_name : str
        Name of the existing column containing timestamps.

    Returns
    -------
    list
        List of updated DataFrame.
    """
    return [
        update_dlc_flag_association(df,max_dlc_value,existing_dlc_column_name,existing_flag_column_name,new_flag_column_name)
        for df in dfs
    ]

## Running Method

In [14]:
dfs = [dos_df, fuzzy_df]

existing_dlc_column_name ="dlc"
max_dlc_value = max([df[existing_dlc_column_name].max()  for df in dfs])
existing_flag_column_name="flag"
new_flag_column_name="updatedFlag"

# updated_dfs = update_multiple_dfs_dlc_flag_association(
#     dfs,max_dlc_value,existing_dlc_column_name,existing_flag_column_name,new_flag_column_name
# )
# dos_df, fuzzy_df= updated_dfs

In [11]:
dos_df.filter(pl.col("dlc") ==2)

timestamp,canId,dlc,byte0,byte1,byte2,byte3,byte4,byte5,byte6,byte7,updatedFlag
f64,str,i64,str,str,str,str,str,str,str,str,str
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
…,…,…,…,…,…,…,…,…,…,…,…
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""
1.4782e9,"""05f0""",2,"""01""","""00""",,,,,,,"""R"""


In [12]:
dos_df.filter(pl.col("dlc") ==8)

timestamp,canId,dlc,byte0,byte1,byte2,byte3,byte4,byte5,byte6,byte7,updatedFlag
f64,str,i64,str,str,str,str,str,str,str,str,str
1.4782e9,"""018f""",8,"""fe""","""5b""","""00""","""00""","""00""","""3c""","""00""","""00""","""R"""
1.4782e9,"""0260""",8,"""19""","""21""","""22""","""30""","""08""","""8e""","""6d""","""3a""","""R"""
1.4782e9,"""02a0""",8,"""64""","""00""","""9a""","""1d""","""97""","""02""","""bd""","""00""","""R"""
1.4782e9,"""0329""",8,"""40""","""bb""","""7f""","""14""","""11""","""20""","""00""","""14""","""R"""
1.4782e9,"""0545""",8,"""d8""","""00""","""00""","""8a""","""00""","""00""","""00""","""00""","""R"""
…,…,…,…,…,…,…,…,…,…,…,…
1.4782e9,"""018f""",8,"""fe""","""59""","""00""","""00""","""00""","""41""","""00""","""00""","""R"""
1.4782e9,"""0260""",8,"""18""","""21""","""21""","""30""","""08""","""8f""","""6d""","""19""","""R"""
1.4782e9,"""02a0""",8,"""24""","""00""","""9a""","""1d""","""97""","""02""","""bd""","""00""","""R"""
1.4782e9,"""0329""",8,"""dc""","""b7""","""7f""","""14""","""11""","""20""","""00""","""14""","""R"""


In [13]:
dos_df[existing_dlc_column_name].max()

8