In [None]:
from typing import List, Dict, Any
from collections import OrderedDict
import copy

def create_dict_for_table(
    dfs: List[List[Dict[str, Any]]],
    columns_to_keep: List[List[str]],
    new_column_names: List[List[str]],
    use_index: List[bool],
    index_names: List[str]
) -> List[List[Dict[str, Any]]]:
    """
    Process multiple lists of dictionaries by keeping selected columns, renaming them,
    and optionally adding index columns. Creates deep copies of input dataframes to avoid
    modifying original data.
    
    Args:
        dfs: List of dataframes (each dataframe is a list of dictionaries)
        columns_to_keep: List of lists containing column names to retain for each dataframe
        new_column_names: List of lists containing new names for retained columns
        use_index: List of booleans indicating whether to add index column for each dataframe
        index_names: List of names for index columns
    
    Returns:
        List of processed dataframes with copied and transformed data
        
    Raises:
        ValueError: If input lists have inconsistent lengths or column mappings
    """
    # Validate input lengths
    input_lengths = [len(x) for x in (dfs, columns_to_keep, new_column_names, use_index, index_names)]
    if len(set(input_lengths)) > 1:
        raise ValueError(
            f"Inconsistent input lengths:\n"
            f"dfs: {input_lengths[0]}, columns_to_keep: {input_lengths[1]}, "
            f"new_column_names: {input_lengths[2]}, use_index: {input_lengths[3]}, "
            f"index_names: {input_lengths[4]}"
        )
    
    # Create a deep copy of the input dataframes list
    processed_dfs = []
    
    # Process each dataframe
    for df_idx, (df, keep_cols, new_cols, add_index, idx_name) in enumerate(
        zip(dfs, columns_to_keep, new_column_names, use_index, index_names)
    ):
        # Validate column name mappings
        if len(keep_cols) != len(new_cols):
            raise ValueError(
                f"Mismatched column counts in dataframe at index {df_idx}: "
                f"columns_to_keep: {len(keep_cols)}, new_column_names: {len(new_cols)}"
            )
        
        # Create column name mapping
        col_mapping = dict(zip(keep_cols, new_cols))
        
        # Create new dataframe for processed rows
        processed_df = []
        
        # Process each row in the dataframe
        for row_idx, row in enumerate(df):
            # Create new row with only desired columns and renamed
            new_row = {
                new_name: copy.deepcopy(row[old_name])
                for old_name, new_name in col_mapping.items()
            }
            
            # Add index if requested
            if add_index:
                new_row[idx_name] = row_idx
                
            # Add processed row to new dataframe
            processed_df.append(new_row)
        
        # Add processed dataframe to result list
        processed_dfs.append(processed_df)
    
    return processed_dfs

NameError: name 'List' is not defined

In [3]:
from utils.read_write import read_csv_v2
from utils.utils import create_table_file

CRASHES_CSV = read_csv_v2('../data/Crashes_Processed.csv')
            
INJURY_FILE_PATH = 'dw_tables_csv/Injury.csv'
INJURY_INDEX_COL = 'Injury_ID'
INJURY_COLUMNS = ['Injuries_Total', 'Injuries_Fatal', 'Injuries_Incapacitating', 'Injuries_Non_Incapacitating', 'Injuries_No_Indication', 'Injuries_Reported_Not_Evident', 'Injuries_Unknown']

create_table_file(CRASHES_CSV, INJURY_FILE_PATH, INJURY_INDEX_COL,  INJURY_COLUMNS)

CRASH_LOCATION_FILE_PATH = 'dw_tables_csv/CrashLocation.csv'
CRASH_LOCATION_INDEX_COL = 'Crash_Location_ID'
CRASH_LOCATION_NEW_COLUMNS = ['Street', 'Street_No', 'Street_Direction', 'Beat_Of_Occurrence', 'Latitude', 'Longitude', 'Location']
CRASH_LOCATION_OG_COLUMNS = ['Street_Name', 'Street_No', 'Street_Direction', 'Beat_Of_Occurrence', 'Latitude', 'Longitude', 'Location']
create_table_file(CRASHES_CSV, CRASH_LOCATION_FILE_PATH, CRASH_LOCATION_INDEX_COL, CRASH_LOCATION_NEW_COLUMNS, CRASH_LOCATION_OG_COLUMNS)

CRASH_CONDITION_FILE_PATH = 'dw_tables_csv/CrashCondition.csv'
CRASH_CONDITION_INDEX_COL = 'Crash_Condition_ID'
CRASH_CONDITION_NEW_COLUMNS = ['Posted_Speed_Limit', 'Traffic_Control_Device', 'Traffic_Control_Device_Condition', 'Weather_Condition', 'Lighting_Condition', 'Trafficway_Type', 'Roadway_Surface_Condition', 'Road_Defect', 'Alignment']
CRASH_CONDITION_OG_COLUMNS = ['Posted_Speed_Limit','Traffic_Control_Device', 'Device_Condition', 'Weather_Condition', 'Lighting_Condition', 'Trafficway_Type', 'Roadway_Surface_Cond', 'Road_Defect', 'Alignment']

create_table_file(CRASHES_CSV, CRASH_CONDITION_FILE_PATH, CRASH_CONDITION_INDEX_COL, CRASH_CONDITION_NEW_COLUMNS, CRASH_CONDITION_OG_COLUMNS)

Reading csv Crashes_Processed.csv
