In [None]:
import pandas as pd

# Load your CSV file (replace 'your_file.csv' with the actual file path in Colab)
df = pd.read_csv('/content/llm-human-readable__content_perfect1_part-00000-3770a9e6-e33b-4dd0-a702-6e2c69180fd8-c000.csv')

# Comprehensive mapping of state/territory codes to full names
state_mapping = {
    'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas', 'CA': 'California',
    'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia',
    'HI': 'Hawaii', 'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa',
    'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
    'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi', 'MO': 'Missouri',
    'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada', 'NH': 'New Hampshire', 'NJ': 'New Jersey',
    'NM': 'New Mexico', 'NY': 'New York', 'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio',
    'OK': 'Oklahoma', 'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
    'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VT': 'Vermont',
    'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia', 'WI': 'Wisconsin', 'WY': 'Wyoming',
    'DC': 'District of Columbia',
    # U.S. Territories
    'PR': 'Puerto Rico', 'GU': 'Guam', 'VI': 'U.S. Virgin Islands', 'AS': 'American Samoa',
    'MP': 'Northern Mariana Islands'
}

# Mapping for broadband types to their full names
bb_type_mapping = {
    'FBB': 'Fixed Broadband',
    'FWA': 'Fixed Wireless Access'
}

# Function to generate the new summary based on the template
def create_uniform_summary(row):
    try:
        # Extract values from the row using the correct column names
        state = row['l1']  # State code (e.g., IL)
        state_name = state_mapping.get(state, state)  # Map state code to full name
        carrier_1 = row['Carrier_1']
        carrier_2 = row['Carrier_2']
        bb_type_1 = bb_type_mapping.get(row['Carrier_1_BB_type'], row['Carrier_1_BB_type'])  # Map FBB/FWA to full name
        bb_type_2 = bb_type_mapping.get(row['Carrier_2_BB_type'], row['Carrier_2_BB_type'])  # Map FBB/FWA to full name
        lost_subs_1 = int(row['num_2_outflow'])  # Households that left Carrier 1 for Carrier 2
        lost_subs_2 = int(row['num_1_inflow'])   # Households that left Carrier 2 for Carrier 1
        net_change = int(row['num_3_netflow'])   # Net flow from Carrier 1 to Carrier 2

        # Determine increase/decline and acquisition/retention
        if net_change > 0:
            change_type = "increase"
            change_aspect = "acquisition"
        elif net_change < 0:
            change_type = "decline"
            change_aspect = "retention"
        else:
            change_type = "no change"
            change_aspect = "subscriber base"

        # Create the summary using the template
        summary = (f"During the period of 2024 June to 2024 July, in the state of {state_name} ({state}) in USA, "
                   f"{lost_subs_1} households which were served by {carrier_1} that provides {bb_type_1} services moved to {carrier_2} that provides {bb_type_2} services "
                   f"and {lost_subs_2} households that were served by {carrier_2} moved to {carrier_1}, "
                   f"with a net flow from {carrier_1} to {carrier_2} of {net_change}. "
                   f"This signifies a {change_type} in the {change_aspect} of {carrier_1}.")

        return summary
    except Exception as e:
        return f"Error processing row: {str(e)}"

# Apply the function to the Human_Readable_Summary column
df['Human_Readable_Summary'] = df.apply(create_uniform_summary, axis=1)

# Save the updated CSV (replace 'updated_file.csv' with your desired output file name)
df.to_csv('updated_file.csv', index=False)

# Display the first few updated summaries to verify
print(df[['Human_Readable_Summary']].head())

# Check for any errors in the summaries
errors = df[df['Human_Readable_Summary'].str.contains("Error processing row")]
if not errors.empty:
    print("\nRows with errors:")
    print(errors)
else:
    print("\nNo errors found in the summaries.")

                              Human_Readable_Summary
0  During the period of 2024 June to 2024 July, i...
1  During the period of 2024 June to 2024 July, i...
2  During the period of 2024 June to 2024 July, i...
3  During the period of 2024 June to 2024 July, i...
4  During the period of 2024 June to 2024 July, i...

No errors found in the summaries.
