In [17]:
import numpy as np
import pandas as pd

# Define the non-linear weight function for income with a floor
def income_weight(income, floor_weight=0.5):
    # Calculate the original weight
    original_weight = 1 / np.log1p(income)
    # Ensure the weight does not fall below the floor weight
    return max(original_weight, floor_weight)

# Example usage within your data processing pipeline
# Load the datasets
nri_data = pd.read_csv('C:\\Users\\17912\\OneDrive\\Desktop\\NRI_Table_CensusTracts_FL_short.csv')
income_data = pd.read_csv('C:\\Users\\17912\\OneDrive\\Desktop\\FL_income.csv')

# Clean up and standardize identifiers
nri_data['NRI_ID'] = nri_data['NRI_ID'].str.replace('T', '')
income_data['Geography_Modified'] = income_data['geography'].str[9:]

# Merge datasets using standardized identifiers
merged_data = pd.merge(income_data, nri_data[['NRI_ID', 'RISK_SCORE']], left_on='Geography_Modified', right_on='NRI_ID', how='left')
merged_data['RISK_SCORE'] = pd.to_numeric(merged_data['RISK_SCORE'], errors='coerce')
merged_data['families_median_income'] = pd.to_numeric(merged_data['families_median_income'], errors='coerce')
merged_data.dropna(subset=['RISK_SCORE', 'families_median_income'], inplace=True)

# Calculate Z-scores and apply non-linear weighting for income with a floor
merged_data['Z_Risk'] = (merged_data['RISK_SCORE'] - merged_data['RISK_SCORE'].mean()) / merged_data['RISK_SCORE'].std()
merged_data['Z_Income'] = -((merged_data['families_median_income'] - \
merged_data['families_median_income'].mean()) / merged_data['families_median_income'].std()) \
* merged_data['families_median_income'].apply(lambda x: income_weight(x))

# Calculate the composite score
merged_data['Composite_Score'] = (merged_data['Z_Risk'] + merged_data['Z_Income']) / 2
merged_data['Rescaled_Composite_Score'] = (merged_data['Composite_Score'] -  \
merged_data['Composite_Score'].min()) / (merged_data['Composite_Score'].max() - merged_data['Composite_Score'].min()) * 100

# Save and print results
merged_data.to_csv('C:\\Users\\17912\\OneDrive\\Desktop\\Risk_and_Income_Data2.csv', index=False)
print(merged_data[['geography', 'RISK_SCORE', 'families_median_income', 'Composite_Score', 'Rescaled_Composite_Score']].head())

              geography  RISK_SCORE  families_median_income  Composite_Score  \
2  1400000US12001000301   70.146982                 48464.0        -0.109586   
3  1400000US12001000302   65.697094                 65109.0        -0.379530   
4  1400000US12001000400   83.301622                 55224.0         0.293533   
5  1400000US12001000500   81.371593                 96591.0        -0.064489   
6  1400000US12001000600   72.823812                 27402.0         0.130761   

   Rescaled_Composite_Score  
2                 70.296048  
3                 62.984962  
4                 81.213986  
5                 71.517434  
6                 76.805513  


In [7]:
import pandas as pd

# Assuming 'merged_data' is already defined and includes 'Composite_Score' and 'geography'

# Convert DataFrame to a dictionary with geography as the key and composite score as the value
geography_scores = dict(zip(merged_data['geography'], merged_data['Composite_Score']))

# Sort the dictionary by composite score in descending order
sorted_geography_scores = dict(sorted(geography_scores.items(), key=lambda item: item[1], reverse=True))

# Print the sorted dictionary
print(sorted_geography_scores)

{'1400000US12086000508': 0.9871612110466483, '1400000US12061050908': 0.9853679445888046, '1400000US12111380300': 0.9829437206139211, '1400000US12011030803': 0.9666961175997093, '1400000US12099008001': 0.9662668346087724, '1400000US12005001600': 0.954704861823021, '1400000US12057014200': 0.9538769255153772, '1400000US12027010200': 0.9493349932829713, '1400000US12015010302': 0.9465189930562392, '1400000US12099002900': 0.9433692734984495, '1400000US12086000903': 0.9411583520476472, '1400000US12071001401': 0.9408843840332122, '1400000US12086001701': 0.9355082154158713, '1400000US12001000901': 0.9314919729544531, '1400000US12061050401': 0.9285953389449164, '1400000US12011080500': 0.9272923250085556, '1400000US12099001402': 0.926025310122151, '1400000US12081000105': 0.9219225496505039, '1400000US12021011400': 0.9213615324000457, '1400000US12011030500': 0.9159325637290437, '1400000US12086009102': 0.912804284298403, '1400000US12011100103': 0.9126335614590901, '1400000US12011100501': 0.91098436

In [13]:
def compare_urgency(sorted_dict, geo1, geo2):
    # Ensure both geographies are in the dictionary
    if geo1 not in sorted_dict or geo2 not in sorted_dict:
        return "One or both geographies not found in the dataset."

    # Create a list of keys from the dictionary to find indexes
    keys_list = list(sorted_dict.keys())

    # Get the indexes of the geographies
    index_geo1 = keys_list.index(geo1)
    index_geo2 = keys_list.index(geo2)

    # Compare indexes
    if index_geo1 < index_geo2:
        return f"{geo1} is more urgent than {geo2}."
    elif index_geo1 > index_geo2:
        return f"{geo2} is more urgent than {geo1}."
    else:
        return f"{geo1} and {geo2} have equal urgency."
        
print(compare_urgency(sorted_geography_scores,'1400000US12011060115','1400000US12011060124'))

1400000US12011060124 is more urgent than 1400000US12011060115.
