In [4]:
# Cell: Reload Border Analysis Data
import json
import pandas as pd
import geopandas as gpd
import numpy as np

print("🔄 RELOADING YOUR BORDER ANALYSIS DATA")
print("=" * 50)

# Reload your geographic data
with open('data/raw/geographic/world_countries_primary.geojson', 'r') as f:
    countries_data = json.load(f)

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(countries_data['features'])
gdf = gdf.set_crs('EPSG:4326')

print(f"✅ Reloaded {len(gdf)} countries")

# Quick neighbor detection function
def find_neighbors_quick(country_name, gdf):
    """Quick neighbor detection."""
    country_row = gdf[gdf['name'] == country_name]
    if len(country_row) == 0:
        return []
    
    country_geom = country_row.geometry.iloc[0]
    neighbors = []
    
    for idx, row in gdf.iterrows():
        if row['name'] != country_name:
            try:
                if country_geom.touches(row.geometry):
                    neighbors.append(row['name'])
            except:
                continue
    return neighbors

# Recreate border analysis for first 20 countries
print("🔗 Recreating border analysis...")
sample_countries = gdf['name'].head(20).tolist()
border_pairs = []

for country in sample_countries:
    neighbors = find_neighbors_quick(country, gdf)
    for neighbor in neighbors:
        if neighbor in sample_countries:
            pair = tuple(sorted([country, neighbor]))
            if pair not in [tuple(sorted([bp['country_1'], bp['country_2']])) for bp in border_pairs]:
                # Quick border length calculation
                try:
                    geom1 = gdf[gdf['name'] == pair[0]].geometry.iloc[0]
                    geom2 = gdf[gdf['name'] == pair[1]].geometry.iloc[0]
                    border = geom1.boundary.intersection(geom2.boundary)
                    length_km = border.length * 111 if hasattr(border, 'length') else 0
                except:
                    length_km = 0
                
                border_pairs.append({
                    'country_1': pair[0],
                    'country_2': pair[1],
                    'border_length_km': length_km,
                    'pair_name': f"{pair[0]} ↔ {pair[1]}"
                })

# Create border_analysis DataFrame
border_analysis = pd.DataFrame(border_pairs)

print(f"✅ Recreated border analysis:")
print(f"   Border pairs: {len(border_analysis)}")
print(f"   Sample pairs: {border_analysis['pair_name'].head().tolist()}")

# Show the data
if len(border_analysis) > 0:
    print("\n📊 Your border analysis is ready!")
    print(border_analysis[['country_1', 'country_2', 'border_length_km']].head())
else:
    print("❌ No border pairs found. Let's create sample data...")
    # Create sample border data if none found
    border_analysis = pd.DataFrame({
        'country_1': ['China', 'Argentina', 'Indonesia', 'Bolivia'],
        'country_2': ['India', 'Chile', 'Malaysia', 'Peru'],
        'border_length_km': [3000, 5563, 2000, 1500],
        'pair_name': ['China ↔ India', 'Argentina ↔ Chile', 'Indonesia ↔ Malaysia', 'Bolivia ↔ Peru']
    })
    print(f"✅ Created sample border data with {len(border_analysis)} pairs")

🔄 RELOADING YOUR BORDER ANALYSIS DATA
✅ Reloaded 258 countries
🔗 Recreating border analysis...
✅ Recreated border analysis:
   Border pairs: 19
   Sample pairs: ['Indonesia ↔ Malaysia', 'Bolivia ↔ Chile', 'Chile ↔ Peru', 'Argentina ↔ Chile', 'Bolivia ↔ Peru']

📊 Your border analysis is ready!
   country_1 country_2  border_length_km
0  Indonesia  Malaysia       1495.556751
1    Bolivia     Chile        847.121695
2      Chile      Peru        166.074543
3  Argentina     Chile       5563.407866
4    Bolivia      Peru        966.083296


In [2]:
# Cell: Economic Asymmetry Functions
def calculate_economic_asymmetries(country1, country2, economic_df, year=2019):
    """Calculate economic asymmetries between two border countries."""
    
    print(f"🔍 Calculating asymmetries: {country1} vs {country2} ({year})")
    
    # Get data for both countries for the specified year
    c1_data = economic_df[(economic_df['country'] == country1) & (economic_df['year'] == year)]
    c2_data = economic_df[(economic_df['country'] == country2) & (economic_df['year'] == year)]
    
    asymmetries = {}
    
    # Calculate ratios for each indicator
    for indicator in economic_df['indicator'].unique():
        c1_value = c1_data[c1_data['indicator'] == indicator]['value']
        c2_value = c2_data[c2_data['indicator'] == indicator]['value']
        
        if len(c1_value) > 0 and len(c2_value) > 0:
            v1, v2 = c1_value.iloc[0], c2_value.iloc[0]
            
            if v2 != 0:  # Avoid division by zero
                ratio = v1 / v2
                asymmetries[f'{indicator}_ratio'] = ratio
                asymmetries[f'{indicator}_gap'] = abs(v1 - v2)
                asymmetries[f'{indicator}_{country1}'] = v1
                asymmetries[f'{indicator}_{country2}'] = v2
    
    return asymmetries

# Test the function
test_asymmetries = calculate_economic_asymmetries('China', 'India', economic_df, 2019)
print("🧪 TEST RESULTS:")
for key, value in test_asymmetries.items():
    if 'ratio' in key:
        print(f"   {key}: {value:.2f}")

🔍 Calculating asymmetries: China vs India (2019)
🧪 TEST RESULTS:
   gdp_per_capita_ratio: 4.97
   trade_percent_ratio: 0.90
   external_debt_ratio: 3.77
   fdi_ratio: 3.70
   military_exp_ratio: 0.66
   population_ratio: 1.01


In [5]:
# Cell: Load Economic Data (Fixed Version)
import json
import pandas as pd

print("💰 LOADING YOUR ECONOMIC DATA")
print("=" * 40)

# Economic indicators from your data
economic_files = {
    'gdp_per_capita': 'data/raw/economic/gdp_per_capita_raw.json',
    'trade_percent': 'data/raw/economic/trade_percent_raw.json',
    'external_debt': 'data/raw/economic/external_debt_raw.json',
    'fdi': 'data/raw/economic/fdi_raw.json',
    'military_exp': 'data/raw/economic/military_exp_raw.json',
    'population': 'data/raw/economic/population_raw.json'
}

def load_wb_data(file_path, indicator_name):
    """Load World Bank data."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        
        records = []
        for record in data[1]:
            if record['value'] is not None:
                records.append({
                    'country': record['country']['value'],
                    'country_code': record['countryiso3code'],
                    'year': int(record['date']),
                    'value': float(record['value']),
                    'indicator': indicator_name
                })
        return pd.DataFrame(records)
    except Exception as e:
        print(f"❌ Error loading {indicator_name}: {e}")
        return pd.DataFrame()

# Load all your economic data
all_economic = []
for indicator, file_path in economic_files.items():
    df = load_wb_data(file_path, indicator)
    if len(df) > 0:
        all_economic.append(df)
        print(f"✅ {indicator}: {len(df)} records")

if all_economic:
    economic_df = pd.concat(all_economic, ignore_index=True)
    print(f"\n📊 TOTAL ECONOMIC DATA:")
    print(f"   Records: {len(economic_df):,}")
    print(f"   Countries: {economic_df['country'].nunique()}")
    print(f"   Indicators: {economic_df['indicator'].nunique()}")
    print(f"   Years: {economic_df['year'].min()}-{economic_df['year'].max()}")
else:
    print("❌ No economic data loaded")
    economic_df = pd.DataFrame()

💰 LOADING YOUR ECONOMIC DATA
✅ gdp_per_capita: 310 records
✅ trade_percent: 305 records
✅ external_debt: 93 records
✅ fdi: 308 records
✅ military_exp: 308 records
✅ population: 310 records

📊 TOTAL ECONOMIC DATA:
   Records: 1,634
   Countries: 10
   Indicators: 6
   Years: 1990-2020


In [6]:
# Cell: Economic Integration (Fixed)
def calculate_economic_ratios(country1, country2, economic_df, year=2019):
    """Calculate economic ratios between border countries."""
    
    # Get latest available data for both countries
    c1_data = economic_df[economic_df['country'] == country1]
    c2_data = economic_df[economic_df['country'] == country2]
    
    if len(c1_data) == 0 or len(c2_data) == 0:
        return {}
    
    ratios = {}
    
    # For each indicator, calculate ratios
    for indicator in economic_df['indicator'].unique():
        # Get most recent data for each country
        c1_indicator = c1_data[c1_data['indicator'] == indicator].sort_values('year').tail(1)
        c2_indicator = c2_data[c2_data['indicator'] == indicator].sort_values('year').tail(1)
        
        if len(c1_indicator) > 0 and len(c2_indicator) > 0:
            v1, v2 = c1_indicator['value'].iloc[0], c2_indicator['value'].iloc[0]
            
            if v2 != 0:
                ratios[f'{indicator}_ratio'] = v1 / v2
                ratios[f'{indicator}_diff'] = abs(v1 - v2)
                ratios[f'{indicator}_{country1}'] = v1
                ratios[f'{indicator}_{country2}'] = v2
    
    return ratios

# Create economic-border dataset
print("🔗 CREATING ECONOMIC-BORDER INTEGRATION")
print("=" * 50)

economic_border_data = []

for idx, border in border_analysis.iterrows():
    country1, country2 = border['country_1'], border['country_2']
    
    print(f"Processing: {country1} ↔ {country2}")
    
    # Calculate economic ratios
    economic_ratios = calculate_economic_ratios(country1, country2, economic_df)
    
    if economic_ratios:  # Only add if we have economic data
        record = {
            'border_pair': f"{country1} ↔ {country2}",
            'country_1': country1,
            'country_2': country2,
            'border_length_km': border['border_length_km'],
            **economic_ratios
        }
        economic_border_data.append(record)
    else:
        print(f"   ⚠️ No economic data found for {country1} or {country2}")

economic_border_df = pd.DataFrame(economic_border_data)

print(f"\n✅ ECONOMIC-BORDER INTEGRATION COMPLETE!")
print(f"   Records: {len(economic_border_df)}")
print(f"   Border pairs with economic data: {economic_border_df['border_pair'].nunique()}")

if len(economic_border_df) > 0:
    print(f"   Features: {len(economic_border_df.columns)}")
    print(f"\n📊 Sample integrated data:")
    display_cols = ['border_pair', 'border_length_km'] + [col for col in economic_border_df.columns if 'ratio' in col][:3]
    print(economic_border_df[display_cols].head())
else:
    print("❌ No integrated data created")

🔗 CREATING ECONOMIC-BORDER INTEGRATION
Processing: Indonesia ↔ Malaysia
   ⚠️ No economic data found for Indonesia or Malaysia
Processing: Bolivia ↔ Chile
   ⚠️ No economic data found for Bolivia or Chile
Processing: Chile ↔ Peru
   ⚠️ No economic data found for Chile or Peru
Processing: Argentina ↔ Chile
   ⚠️ No economic data found for Argentina or Chile
Processing: Bolivia ↔ Peru
   ⚠️ No economic data found for Bolivia or Peru
Processing: Argentina ↔ Bolivia
   ⚠️ No economic data found for Argentina or Bolivia
Processing: Cyprus ↔ Dhekelia Sovereign Base Area
   ⚠️ No economic data found for Cyprus or Dhekelia Sovereign Base Area
Processing: China ↔ India
Processing: Israel ↔ Palestine
   ⚠️ No economic data found for Israel or Palestine
Processing: Israel ↔ Lebanon
   ⚠️ No economic data found for Israel or Lebanon
Processing: Israel ↔ Syria
   ⚠️ No economic data found for Israel or Syria
Processing: Lebanon ↔ Syria
   ⚠️ No economic data found for Lebanon or Syria
Processing: E