In [None]:
import pandas as pd


def extract_traffic_context(group):
    traffic_entries = []

    group = group.sort_values("datetime_input")

    for _, row in group.iterrows():
        if pd.isna(row['datetime_input']):
            continue

        row_block = []
        current_title = None

        for col in ['A1', 'B1', 'C1']:
            if not pd.isna(row[col]):
                part = str(row[col])
                lines = part.splitlines()

                for line in lines:
                    line = line.strip()
                    if not line:
                        continue

                    word_count = len(line.split())

                    if word_count < 5:
                        current_title = f"**{line}**"
                        row_block.append(f"\t- {current_title}:")
                    else:
                        if current_title:
                            row_block.append(f"\t\t- {line}")
                        else:
                            row_block.append(f"\t- {line}")

        if row_block:
            traffic_entries.append(f"- [{row['datetime_input']}]:\n" + "\n".join(row_block))

    return "\n\n".join(traffic_entries) if traffic_entries else "No valid traffic entries."

# Read input CSV
input_csv = "../data/dataset-test.csv"  # <-- your actual input
output_csv = "dataset-test-promptEng.csv"  # <-- your desired output

df = pd.read_csv(input_csv)

# Group by 'id_output'
grouped = df.groupby('id_output')

# Prepare list of processed rows
processed_rows = []

for id_output, group in grouped:
    row = {}

    # Columns assumed constant across group
    row['timestamp'] = group['datetime_output'].iloc[0]
    row['output'] = group['content'].iloc[0]
    row['programs'] = group['programs'].iloc[0]
    row['nujna'] = group['nujna'].iloc[0]
    row['nova'] = group['nova'].iloc[0]

    # Process 'input'
    row['input'] = extract_traffic_context(group)

    processed_rows.append(row)

# Save
processed_df = pd.DataFrame(processed_rows)
processed_df.to_csv(output_csv, index=False)

print(f"Processed {len(processed_df)} groups. Saved to {output_csv}.")


  df = pd.read_csv(input_csv)


Processed 4625 groups. Saved to dataset-test-promptEng.csv.
