In [None]:
# Helper function to format the sequence of events for a driver
def get_sequence_of_events(row):
    events = []
    for i in range(1, 5):
        event_col = f'Sequence of Events {i}'
        event = row[event_col]
        if event != 'Uncoded & Errors':
            events.append(event)
    if len(events) == 0:
        return "no recorded events"
    elif len(events) == 1:
        return events[0]
    else:
        return ', followed by '.join(events)

# Function to generate a crash narrative and target from a group of two drivers
def generate_narrative_and_target(group):
    # Check the Hazardous Action status for both drivers
    # 'None' or NaN is treated as no hazardous action
    hazardous_actions = group['Hazardous Action'].apply(lambda x: x == 'None' or pd.isna(x))
    both_none = hazardous_actions.all()  # Both are 'None' or NaN
    both_hazardous = not hazardous_actions.any()  # Both are neither 'None' nor NaN

    if both_none:
        # Both drivers have 'None' or NaN
        driver1_row = group.iloc[0]
        driver2_row = group.iloc[1]
        target = "<NO HAZARDOUS ACTION>"
    elif both_hazardous:
        # Both drivers have hazardous actions
        driver1_row = group.iloc[0]
        driver2_row = group.iloc[1]
        target = "<BOTH DRIVERS TOOK HAZARDOUS ACTION>"
    else:
        # Exactly one driver has a hazardous action
        # Identify the row having the hazardous action (ignoring driver numbering)
        hazardous_driver_row = group[group['Hazardous Action'].apply(lambda x: x != 'None' and pd.notna(x))].iloc[0]
        nonhazardous_driver_row = group[group['Hazardous Action'].apply(lambda x: x == 'None' or pd.isna(x))].iloc[0]
        # For narrative purposes, we assign the hazardous action row as driver1 arbitrarily,
        # but the target now only reflects the hazardous action without a driver number.
        driver1_row = hazardous_driver_row
        driver2_row = nonhazardous_driver_row
        target = f"<{hazardous_driver_row['Hazardous Action']}>"

    # Extract common crash attributes from Driver 1's row
    area = driver1_row['Rural/Urban Area (2016+)']
    lanes = driver1_row['Number of Traffic Lanes']
    road_cond = driver1_row['Road Conditions']
    speed_limit = driver1_row['Speed Limit at Crash Site']
    traffic_control = driver1_row['Traffic Control'] if pd.notna(driver1_row['Traffic Control']) else 'no traffic control'
    trafficway = driver1_row['Trafficway']
    crash_month = driver1_row['Crash Month']
    crash_type = driver1_row['Crash Type']
    lighting = driver1_row['Lighting Conditions']
    crash_year = driver1_row['Crash Year']
    time_of_day = driver1_row['Time of Day']
    day_of_week = driver1_row['Day of Week']
    weather = driver1_row['Weather Conditions (2016+)']

    # Extract driver-specific attributes for Driver 1
    driver1_age = driver1_row['Driver Age']
    driver1_gender = driver1_row['Driver Gender']
    driver1_action = driver1_row['Action Prior to Crash']
    driver1_distraction = driver1_row['Driver Distraction (2016+)']
    driver1_sequence = get_sequence_of_events(driver1_row)

    # Extract driver-specific attributes for Driver 2
    driver2_age = driver2_row['Driver Age']
    driver2_gender = driver2_row['Driver Gender']
    driver2_action = driver2_row['Action Prior to Crash']
    driver2_distraction = driver2_row['Driver Distraction (2016+)']
    driver2_sequence = get_sequence_of_events(driver2_row)

    # Construct the narrative (without Hazardous Action)
    narrative = (
        f"A crash occurred in a {area} area on a {lanes}-lane road with {road_cond} conditions. "
        f"The speed limit was {speed_limit} mph, and traffic control was {traffic_control}. "
        f"The trafficway was {trafficway}. "
        f"It happened in {crash_month} {crash_year}, at {time_of_day} on a {day_of_week}. "
        f"The weather was {weather}, and lighting condtion was {lighting}. "
        f"The crash type was {crash_type}. "
        f"Driver 1, a {driver1_age}-year-old {driver1_gender}, the action prior to crash was {driver1_action}, "
        f"and distraction status was {driver1_distraction}. "
        f"Their sequence of events was: {driver1_sequence}. "
        f"Driver 2, a {driver2_age}-year-old {driver2_gender}, the action prior to crash was {driver2_action}, "
        f"and distraction status was {driver2_distraction}. "
        f"Their sequence of events was: {driver2_sequence}."
    )

    # Construct the full structured output
    structured_output = (
        "<|start_header_id|>system<|end_header_id|>\n"
        "You are a helpful assistant designed to predict the hazardous action took by a driver based on the traffic crash information below.\n"
        "<|eot_id|>\n"
        "<|start_header_id|>user<|end_header_id|>\n"
        f"{narrative}\n"
        "Predict One crash-level hazardous action status for involved drivers from the available options: In case only one driver had hazardous action, choose one from <Speed and Stopping Violations>, <Right-of-Way and Traffic Control Violations>, <Lane and Direction Violations>, <Maneuvering and Signaling Errors>, <General Unsafe Driving>. In case both drivers took hazardous actions, output <BOTH DRIVERS TOOK HAZARDOUS ACTION>. In case Neither drivers had hazardous actions, output <NO HAZARDOUS ACTION>.\n"
        "<|eot_id|>\n"
        "<|start_header_id|>assistant<|end_header_id|>\n"
        f"{target}\n"
        "<|eot_id|>"
    )
    return structured_output

narratives = MTCF_test.groupby('Crash Instance').apply(generate_narrative_and_target).tolist()

# Print the first few narratives to verify
for i, narrative in enumerate(narratives[:5], 1):
    print(f"Narrative {i}:\n{narrative}\n")
