In [2]:
import pandas as pd
import numpy as np
import json

def create_ranking_comparison():
    # Read the GeoJSON file
    with open('HVI_with_columnames.geojson', 'r') as f:
        data = json.load(f)
    
    # Create initial dataframe
    df = pd.DataFrame([{
        'Ward': f['properties']['WardNo_'],
        'HVI Score': float(f['properties']['Ward_HVI_1_HVI_weighted_standardized']),
        'LST': f['properties']['LST'],
        'NDVI': f['properties']['NDVI'],
        'Public HC': f['properties']['Using public healthcare facilities'],
        'No Insurance': f['properties']['No medical insurance'],
        'Hunger Risk': f['properties']['Household hunger risk'],
        'Crowded': f['properties']['Crowded dwellings']
    } for f in data['features']])
    
    # Create rank columns for each metric
    metrics = {
        'HVI': 'HVI Score',
        'LST': 'LST',
        'NDVI': 'NDVI',
        'Public HC': 'Public HC',
        'No Insurance': 'No Insurance',
        'Hunger': 'Hunger Risk',
        'Crowded': 'Crowded'
    }
    
    # Calculate ranks (ascending=False for all except NDVI where lower is worse)
    for metric_name, column in metrics.items():
        ascending = True if metric_name == 'NDVI' else False
        df[f'{metric_name}_rank'] = df[column].rank(ascending=ascending)
    
    # Get top 10 wards by HVI
    top_10_hvi = df.nlargest(10, 'HVI Score')[['Ward', 'HVI Score']]
    
    # Create comparison table for these wards
    comparison = pd.DataFrame()
    comparison['Ward'] = top_10_hvi['Ward']
    
    # Add ranks for each metric
    for metric_name in metrics.keys():
        comparison[f'{metric_name}_rank'] = df[df['Ward'].isin(top_10_hvi['Ward'])][f'{metric_name}_rank']
    
    # Add actual values
    for metric_name, column in metrics.items():
        comparison[f'{metric_name}_value'] = df[df['Ward'].isin(top_10_hvi['Ward'])][column]
    
    # Sort by HVI rank
    comparison = comparison.sort_values('HVI_rank')
    
    # Format for display
    display_cols = ['Ward']
    for metric in metrics.keys():
        display_cols.extend([f'{metric}_rank', f'{metric}_value'])
    
    formatted = comparison[display_cols].round(2)
    
    return formatted

# Create and display the comparison table
comparison_table = create_ranking_comparison()
print("\nRank Comparison for Top 10 HVI Wards:")
print("=" * 100)
print(comparison_table.to_string())

# Save to Excel for better formatting
comparison_table.to_excel('ward_rank_comparison.xlsx', index=False)


Rank Comparison for Top 10 HVI Wards:
    Ward  HVI_rank  HVI_value  LST_rank  LST_value  NDVI_rank  NDVI_value  Public HC_rank  Public HC_value  No Insurance_rank  No Insurance_value  Hunger_rank  Hunger_value  Crowded_rank  Crowded_value
86    87       1.0       1.00     129.0      25.36      131.0        0.21           122.0            11.29              125.0               17.38        115.0          4.82         126.5           0.00
116  117       2.0       0.96     128.0      25.59      135.0        0.21           124.0            10.85              128.5               13.96        108.0          6.33         106.0           2.48
98    99       3.0       0.92     124.0      26.05      133.0        0.21           112.0            17.88              109.0               32.86        132.0          1.05         126.5           0.00
103  104       4.0       0.92     116.0      26.56      125.0        0.20           134.0             4.62              130.0               13.90        

In [3]:
import pandas as pd
import json

def create_univariate_rankings():
    # Read the GeoJSON file
    with open('HVI_with_columnames.geojson', 'r') as f:
        data = json.load(f)
    
    # Create initial dataframe
    df = pd.DataFrame([{
        'Ward': f['properties']['WardNo_'],
        'HVI Score': float(f['properties']['Ward_HVI_1_HVI_weighted_standardized']),
        'LST': f['properties']['LST'],
        'NDVI': f['properties']['NDVI'],
        'Public HC': f['properties']['Using public healthcare facilities'],
        'No Insurance': f['properties']['No medical insurance'],
        'Hunger Risk': f['properties']['Household hunger risk'],
        'Crowded': f['properties']['Crowded dwellings']
    } for f in data['features']])
    
    # Define metrics and whether higher or lower values indicate vulnerability
    metrics = {
        'HVI Score': {'ascending': False, 'label': 'Combined HVI'},
        'LST': {'ascending': False, 'label': 'Land Surface Temperature'},
        'NDVI': {'ascending': True, 'label': 'Vegetation Index'},  # Lower is worse
        'Public HC': {'ascending': False, 'label': 'Public Healthcare Use'},
        'No Insurance': {'ascending': False, 'label': 'No Medical Insurance'},
        'Hunger Risk': {'ascending': False, 'label': 'Household Hunger Risk'},
        'Crowded': {'ascending': False, 'label': 'Crowded Dwellings'}
    }
    
    # Create a DataFrame to store top 10 for each metric
    results = pd.DataFrame()
    
    # Get top 10 for each metric
    for metric, config in metrics.items():
        # Sort and get top 10
        top_10 = df.nlargest(10, metric) if not config['ascending'] else df.nsmallest(10, metric)
        
        # Add to results
        results[f'{config["label"]} Ward'] = top_10['Ward']
        results[f'{config["label"]} Value'] = top_10[metric].round(3)
    
    return results

# Create and display the rankings
rankings = create_univariate_rankings()
print("\nTop 10 Most Vulnerable Wards by Different Metrics:")
print("=" * 100)
print(rankings.to_string())

# Save to Excel for better formatting
rankings.to_excel('univariate_ward_rankings.xlsx', index=True, index_label='Rank')

# Optional: Print overlap analysis
print("\nOverlap Analysis with Combined HVI Top 10:")
print("=" * 100)

hvi_wards = set(rankings['Combined HVI Ward'])
metrics = [col for col in rankings.columns if 'Ward' in col and col != 'Combined HVI Ward']

for metric in metrics:
    metric_wards = set(rankings[metric])
    overlap = hvi_wards & metric_wards
    print(f"\n{metric.replace(' Ward', '')}:")
    print(f"Shared wards with HVI top 10: {len(overlap)}")
    print(f"Shared wards: {sorted(overlap)}")


Top 10 Most Vulnerable Wards by Different Metrics:
    Combined HVI Ward  Combined HVI Value Land Surface Temperature Ward  Land Surface Temperature Value Vegetation Index Ward  Vegetation Index Value Public Healthcare Use Ward  Public Healthcare Use Value No Medical Insurance Ward  No Medical Insurance Value Household Hunger Risk Ward  Household Hunger Risk Value Crowded Dwellings Ward  Crowded Dwellings Value
86                 87               1.000                           NaN                             NaN                   NaN                     NaN                        NaN                          NaN                       NaN                         NaN                        NaN                          NaN                    NaN                      NaN
116               117               0.959                           NaN                             NaN                   NaN                     NaN                        NaN                          NaN               

In [4]:
import pandas as pd
import json

# Read the GeoJSON file
with open('HVI_with_columnames.geojson', 'r') as f:
    data = json.load(f)

# Create initial dataframe
rows = []
for feature in data['features']:
    props = feature['properties']
    rows.append({
        'Ward': props['WardNo_'],
        'HVI': float(props['Ward_HVI_1_HVI_weighted_standardized']),  # Ensure float conversion
        'LST': float(props['LST']),  # Convert to float
        'NDVI': float(props['NDVI']),  # Convert to float
        'Public_HC': float(props['Using public healthcare facilities']),  # Convert to float
        'No_Insurance': float(props['No medical insurance']),  # Convert to float
        'Hunger_Risk': float(props['Household hunger risk']),  # Convert to float
        'Crowded': float(props['Crowded dwellings'])  # Convert to float
    })

df = pd.DataFrame(rows)

# Create separate top 10 lists
top_10_hvi = df.nlargest(10, 'HVI')[['Ward', 'HVI']].reset_index(drop=True)
top_10_lst = df.nlargest(10, 'LST')[['Ward', 'LST']].reset_index(drop=True)
top_10_ndvi = df.nsmallest(10, 'NDVI')[['Ward', 'NDVI']].reset_index(drop=True)  # Note: smallest for NDVI
top_10_health = df.nlargest(10, 'Public_HC')[['Ward', 'Public_HC']].reset_index(drop=True)
top_10_insurance = df.nlargest(10, 'No_Insurance')[['Ward', 'No_Insurance']].reset_index(drop=True)
top_10_hunger = df.nlargest(10, 'Hunger_Risk')[['Ward', 'Hunger_Risk']].reset_index(drop=True)
top_10_crowded = df.nlargest(10, 'Crowded')[['Ward', 'Crowded']].reset_index(drop=True)

# Create final comparison table
comparison = pd.DataFrame({
    'HVI Rank': top_10_hvi['Ward'],
    'HVI Value': top_10_hvi['HVI'].round(3),
    'LST Rank': top_10_lst['Ward'],
    'LST Value': top_10_lst['LST'].round(2),
    'NDVI Rank': top_10_ndvi['Ward'],
    'NDVI Value': top_10_ndvi['NDVI'].round(3),
    'Public HC Rank': top_10_health['Ward'],
    'Public HC Value': top_10_health['Public_HC'].round(1),
    'No Insurance Rank': top_10_insurance['Ward'],
    'No Insurance Value': top_10_insurance['No_Insurance'].round(1),
    'Hunger Risk Rank': top_10_hunger['Ward'],
    'Hunger Risk Value': top_10_hunger['Hunger_Risk'].round(1),
    'Crowded Rank': top_10_crowded['Ward'],
    'Crowded Value': top_10_crowded['Crowded'].round(1)
})

# Print the comparison table
print("\nTop 10 Worst Wards by Each Metric:")
print("=" * 100)
print(comparison.to_string())

# Save to Excel
comparison.to_excel('univariate_rankings_comparison.xlsx', index=True, index_label='Rank')

# Print overlap analysis
print("\nOverlap Analysis with HVI Top 10:")
print("=" * 100)
hvi_wards = set(top_10_hvi['Ward'])

metrics = {
    'LST': top_10_lst['Ward'],
    'NDVI': top_10_ndvi['Ward'],
    'Public Healthcare': top_10_health['Ward'],
    'No Insurance': top_10_insurance['Ward'],
    'Hunger Risk': top_10_hunger['Ward'],
    'Crowded Dwellings': top_10_crowded['Ward']
}

for metric_name, metric_wards in metrics.items():
    overlap = hvi_wards.intersection(set(metric_wards))
    print(f"\n{metric_name}:")
    print(f"Number of shared wards with HVI top 10: {len(overlap)}")
    if len(overlap) > 0:
        print(f"Shared wards: {sorted(list(overlap))}")


Top 10 Worst Wards by Each Metric:
  HVI Rank  HVI Value LST Rank  LST Value NDVI Rank  NDVI Value Public HC Rank  Public HC Value No Insurance Rank  No Insurance Value Hunger Risk Rank  Hunger Risk Value Crowded Rank  Crowded Value
0       87      1.000      116      30.66        63       0.045            128             98.6               128                97.1                2               70.0          113           51.5
1      117      0.959      108      30.56       116       0.054             35             96.5                61                94.5                6               69.0           95           43.4
2       99      0.920      113      30.37       108       0.057            127             95.6               116                94.2               48               66.7          114           40.8
3      104      0.917      135      30.37        62       0.058             21             94.8                19                94.2              121               64.4   