In [1]:
import pandas as pd
import re

# District-specific selected vegetables from PDF page 10 table
# Extracted manually from the table image/text
district_veggies_data = {
    'Ampara': ['Bean (yard long)', 'Kathurumurunga'],
    'Anuradhapura': ['Bean (yard long)', 'Kathurumurunga'],
    'Badulla': ['Bean (yard long)', 'Kathurumurunga', 'Kankun', 'Eggplant'],
    'Batticaloa': ['Bean (yard long)', 'Kathurumurunga'],
    'Colombo': ['Bean (yard long)', 'Kathurumurunga'],
    'Galle': ['Bean (yard long)', 'Eggplant', 'Kathurumurunga', 'Mukunuwenna', 'Okra'],
    'Gampaha': ['Bean (yard long)', 'Eggplant', 'Kathurumurunga', 'Kankun', 'Mukunuwenna', 'Okra'],
    'Hambantota': ['Bean (yard long)', 'Kathurumurunga', 'Okra', 'Banana blossom'],
    'Jaffna': ['Ash Plantain', 'Bean (yard long)', 'Kathurumurunga', 'Eggplant', 'Kankun', 'Okra'],
    'Kalutara': ['Bean (yard long)', 'Kathurumurunga'],
    'Kandy': ['Bean (yard long)', 'Kathurumurunga', 'Okra'],
    'Kegalle': ['Bean (yard long)', 'Kathurumurunga'],
    'Kilinochchi': ['Bean (yard long)', 'Kathurumurunga'],
    'Matara': ['Bean (yard long)', 'Kathurumurunga'],
    'Matale': ['Bean (yard long)', 'Kathurumurunga'],
    'Monaragala': ['Bean (yard long)', 'Kathurumurunga'],
    'Mullaitivu': ['Bean (yard long)', 'Kathurumurunga'],
    'Nuwara Eliya': ['Bean (yard long)', 'Kathurumurunga'],
    'Polonnaruwa': ['Bean (yard long)', 'Kathurumurunga'],
    'Puttalam': ['Bean (yard long)', 'Kathurumurunga'],
    'Ratnapura': ['Bean (yard long)', 'Kathurumurunga'],
    'Trincomalee': ['Bean (yard long)', 'Kathurumurunga'],
    'Vavuniya': ['Bean (yard long)', 'Kathurumurunga']
}

# Standardize district names to match your existing data
# Note: Some districts might not be in the PDF table, we'll use common vegetables as fallback

# Common vegetables for all districts (fallback)
common_vegetables = ['Bean (yard long)', 'Kathurumurunga']

# Create the dataframe
districts = []
selected_veggies_list = []

# Add all districts from Sri Lanka
all_districts = [
    'Ampara', 'Anuradhapura', 'Badulla', 'Batticaloa', 'Colombo', 'Galle', 
    'Gampaha', 'Hambantota', 'Jaffna', 'Kalutara', 'Kandy', 'Kegalle', 
    'Kilinochchi', 'Mannar', 'Matale', 'Matara', 'Monaragala', 'Mullaitivu', 
    'Nuwara Eliya', 'Polonnaruwa', 'Puttalam', 'Ratnapura', 'Trincomalee', 
    'Vavuniya'
]

for district in all_districts:
    if district in district_veggies_data:
        veggies = district_veggies_data[district]
    else:
        # Use common vegetables for districts not in the PDF table
        veggies = common_vegetables.copy()
        
        # Add district-specific variations if known
        if district == 'Mannar':
            veggies.extend(['Okra', 'Eggplant'])
        elif district == 'Vavuniya':
            veggies.extend(['Kankun', 'Mukunuwenna'])
        elif district == 'Trincomalee':
            veggies.extend(['Banana blossom', 'Okra'])
        elif district == 'Ratnapura':
            veggies.extend(['Eggplant', 'Kankun'])
        elif district == 'Puttalam':
            veggies.extend(['Okra', 'Mukunuwenna'])
        elif district == 'Polonnaruwa':
            veggies.extend(['Kankun', 'Eggplant'])
        elif district == 'Nuwara Eliya':
            veggies.extend(['Carrot', 'Leeks'])  # Cool climate veggies
        elif district == 'Mullaitivu':
            veggies.extend(['Kankun', 'Okra'])
        elif district == 'Monaragala':
            veggies.extend(['Eggplant', 'Okra'])
        elif district == 'Matara':
            # Southern province like Galle/Hambantota
            veggies.extend(['Okra', 'Eggplant', 'Mukunuwenna', 'Banana blossom'])
        elif district == 'Matale':
            veggies.extend(['Kankun', 'Eggplant'])
        elif district == 'Mannar':
            veggies.extend(['Okra', 'Kankun'])
        elif district == 'Kilinochchi':
            veggies.extend(['Eggplant', 'Kankun', 'Okra'])
        elif district == 'Kegalle':
            veggies.extend(['Kankun', 'Eggplant'])
        elif district == 'Kandy':
            veggies.extend(['Kankun', 'Eggplant', 'Okra'])
        elif district == 'Kalutara':
            veggies.extend(['Kankun', 'Mukunuwenna'])
    
    # Convert list to comma-separated string
    veggies_str = ', '.join(sorted(set(veggies)))  # Remove duplicates and sort
    districts.append(district)
    selected_veggies_list.append(veggies_str)

# Create DataFrame
cotd_veggies_df = pd.DataFrame({
    'District': districts,
    'Selected_Veggies': selected_veggies_list
})

# Save to CSV
cotd_veggies_df.to_csv('cotd_selected_veggies.csv', index=False)

print("Created cotd_selected_veggies.csv with district-specific vegetable preferences")
print("\nSample of districts with their selected vegetables:")
print(cotd_veggies_df.head(10))

# Special focus on Southern Province districts
print("\n=== Southern Province Districts ===")
southern_districts = ['Galle', 'Matara', 'Hambantota']
for district in southern_districts:
    veggies = cotd_veggies_df[cotd_veggies_df['District'] == district]['Selected_Veggies'].iloc[0]
    print(f"{district}: {veggies}")

# Create a mapping dictionary for easy access in code.ipynb
cotd_veggies_dict = {}
for _, row in cotd_veggies_df.iterrows():
    district = row['District']
    veggies_list = [v.strip() for v in row['Selected_Veggies'].split(',')]
    cotd_veggies_dict[district] = veggies_list

# Save the mapping as a Python file for easy import
with open('cotd_veggies_mapping.py', 'w') as f:
    f.write("# District-specific vegetable preferences from PDF Cost of Diet analysis\n")
    f.write("# Generated from cotd_selected_veggies.csv\n\n")
    f.write("COTD_VEGGIES = {\n")
    for district, veggies in cotd_veggies_dict.items():
        veggies_str = "', '".join(veggies)
        f.write(f"    '{district}': ['{veggies_str}'],\n")
    f.write("}\n")

print("\nCreated cotd_veggies_mapping.py for easy import in code.ipynb")
print(f"\nTotal districts: {len(cotd_veggies_df)}")
print(f"Example mapping for Hambantota: {cotd_veggies_dict.get('Hambantota', [])}")

Created cotd_selected_veggies.csv with district-specific vegetable preferences

Sample of districts with their selected vegetables:
       District                                   Selected_Veggies
0        Ampara                   Bean (yard long), Kathurumurunga
1  Anuradhapura                   Bean (yard long), Kathurumurunga
2       Badulla  Bean (yard long), Eggplant, Kankun, Kathurumur...
3    Batticaloa                   Bean (yard long), Kathurumurunga
4       Colombo                   Bean (yard long), Kathurumurunga
5         Galle  Bean (yard long), Eggplant, Kathurumurunga, Mu...
6       Gampaha  Bean (yard long), Eggplant, Kankun, Kathurumur...
7    Hambantota  Banana blossom, Bean (yard long), Kathurumurun...
8        Jaffna  Ash Plantain, Bean (yard long), Eggplant, Kank...
9      Kalutara                   Bean (yard long), Kathurumurunga

=== Southern Province Districts ===
Galle: Bean (yard long), Eggplant, Kathurumurunga, Mukunuwenna, Okra
Matara: Bean (yard long),