In [4]:
import pandas as pd
import csv

# Load the CSV file
df = pd.read_csv(
    '/content/llm-human-readable__content_perfect4_part-00000-b2bba7e9-83e1-4d80-9d14-5abac0a2d226-c000.csv',  # Replace with your actual file path
    quoting=csv.QUOTE_ALL,
    na_values=['']
)

# Mapping for broadband types to their full names
bb_type_mapping = {
    'FBB': 'Fixed Broadband',
    'FWA': 'Fixed Wireless Access'
}

# Function to generate the new summary based on the proposed template
def create_uniform_summary(row):
    try:
        # Extract values from the row using the correct column names
        carrier_1 = row['carrier_1']  # e.g., AT&T
        bb_type_1 = bb_type_mapping.get(row['carrier_1_bb_type'], row['carrier_1_bb_type'])  # Map FBB to Fixed Broadband
        carrier_2 = row['carrier_2']  # e.g., Airstream Communications, LLC
        bb_type_2 = bb_type_mapping.get(row['carrier_2_bb_type'], row['carrier_2_bb_type'])  # Map FBB to Fixed Broadband

        # Swap the variables to match the template's order
        num_1_to_2 = int(row['num_2_outflow'])  # Households that moved from Carrier 1 to Carrier 2 (should be 22 in the template)
        num_2_to_1 = int(row['num_1_inflow'])   # Households that moved from Carrier 2 to Carrier 1 (should be 31 in the template)
        net_flow = int(row['num_3_netflow'])    # Net flow from Carrier 1 to Carrier 2

        # Format the summary using the proposed template
        summary = (f"During the period of 2024 June to 2024 July, in the USA, "
                   f"{num_1_to_2} households which were served by {carrier_1} that provides {bb_type_1} services moved to {carrier_2} that provides {bb_type_2} services "
                   f"and {num_2_to_1} households that were served by {carrier_2} moved to {carrier_1}, "
                   f"with a net flow from {carrier_1} to {carrier_2} of {net_flow}.")

        return summary
    except Exception as e:
        return f"Error processing row: {str(e)}"

# Rename the 'human_readable_summary' column to 'Human_Readable_Summary'
df = df.rename(columns={'human_readable_summary': 'Human_Readable_Summary'})

# Apply the function to the Human_Readable_Summary column
df['Human_Readable_Summary'] = df.apply(create_uniform_summary, axis=1)

# Save the updated CSV
df.to_csv('/content/updated_file.csv', index=False)

# Display the updated summaries to verify
print(df[['Human_Readable_Summary']].head())

# Check for any errors in the summaries
errors = df[df['Human_Readable_Summary'].str.contains("Error processing row")]
if not errors.empty:
    print("\nRows with errors:")
    print(errors)
else:
    print("\nNo errors found in the summaries.")

                              Human_Readable_Summary
0  During the period of 2024 June to 2024 July, i...
1  During the period of 2024 June to 2024 July, i...
2  During the period of 2024 June to 2024 July, i...
3  During the period of 2024 June to 2024 July, i...
4  During the period of 2024 June to 2024 July, i...

No errors found in the summaries.
