In [1]:
import pandas as pd
import plotly.graph_objects as go
from geopy.geocoders import Nominatim
import time
from tqdm import tqdm

# 1. Load and prepare data
print("\n🔍 Preparing data...")
df = pd.read_csv("e_waste_dataset.csv")  # Use your actual file

# City-to-country mapping — customize as needed
city_to_country = {
    'New York': 'United States',
    'Berlin': 'Germany',
    'Tokyo': 'Japan',
    'London': 'United Kingdom',
    'Paris': 'France',
    'Mumbai': 'India',
    'Shanghai': 'China',
    'Sydney': 'Australia',
    'Toronto': 'Canada',
    'São Paulo': 'Brazil'
}

df['Origin_Country'] = df['Location'].map(city_to_country).fillna(df['Location'])

# 2. Geocoding setup
print("\n🌍 Geocoding locations...")
geolocator = Nominatim(user_agent="ewaste_flow_analysis_numbered")
coordinate_cache = {}

def get_coordinates(place):
    if place in coordinate_cache:
        return coordinate_cache[place]
    
    try:
        location = geolocator.geocode(place)
        time.sleep(1.1)
        if location:
            coords = {'lat': location.latitude, 'lon': location.longitude}
            coordinate_cache[place] = coords
            return coords
        return {'lat': 0, 'lon': 0}
    except Exception as e:
        print(f"⚠️ Could not geocode {place}: {str(e)}")
        return {'lat': 0, 'lon': 0}

# 3. Data aggregation
print("\n📊 Processing data...")
flow_data = df.groupby(['Location', 'Exported_To']).agg(
    Total_Weight=('Weight_kg', 'sum'),
    Shipment_Count=('Weight_kg', 'count')
).reset_index()

origin_list = flow_data['Location'].unique()
destination_list = flow_data['Exported_To'].unique()
all_places = list(origin_list) + list(destination_list)

# Geocode all unique locations
print("⏳ Geocoding progress:")
for place in tqdm(all_places):
    get_coordinates(place)

# 4. Create visualization
print("\n🎨 Creating visualization...")
fig = go.Figure(layout=go.Layout(width=1400, height=800, autosize=False))
color_scale = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8']

# Draw flow lines
for i, row in flow_data.iterrows():
    origin = get_coordinates(row['Location'])
    dest = get_coordinates(row['Exported_To'])

    fig.add_trace(go.Scattergeo(
        lon=[origin['lon'], dest['lon']],
        lat=[origin['lat'], dest['lat']],
        mode='lines',
        line=dict(
            width=0.5 + (row['Total_Weight'] / flow_data['Total_Weight'].max() * 5),
            color=color_scale[i % len(color_scale)]
        ),
        opacity=0.7,
        hoverinfo='text',
        text=(f"<b>From:</b> {row['Location']}<br>"
              f"<b>To:</b> {row['Exported_To']}<br>"
              f"<b>Total Weight:</b> {row['Total_Weight']:,.2f} kg<br>"
              f"<b>Shipments:</b> {row['Shipment_Count']}"),
        showlegend=False
    ))

# Add origin points with numbers
fig.add_trace(go.Scattergeo(
    lon=[get_coordinates(loc)['lon'] for loc in origin_list],
    lat=[get_coordinates(loc)['lat'] for loc in origin_list],
    mode='markers+text',
    marker=dict(size=12, color='#4285F4', line=dict(width=2, color='white')),
    text=[f"{i+1}. {loc}" for i, loc in enumerate(origin_list)],
    textposition="top center",
    hoverinfo='text',
    name='Collection Points'
))

# Add destination points with numbers
fig.add_trace(go.Scattergeo(
    lon=[get_coordinates(loc)['lon'] for loc in destination_list],
    lat=[get_coordinates(loc)['lat'] for loc in destination_list],
    mode='markers+text',
    marker=dict(size=12, color='#EA4335', symbol='diamond', line=dict(width=2, color='white')),
    text=[f"{i+1}. {loc}" for i, loc in enumerate(destination_list)],
    textposition="top center",
    hoverinfo='text',
    name='Export Destinations'
))

# Layout configuration
fig.update_layout(
    title=dict(
        text='<b>Global E-Waste Export Flows (Numbered)</b>',
        font=dict(size=24, family="Arial Black"),
        x=0.5,
        xanchor='center'
    ),
    geo=dict(
        scope='world',
        projection_type='natural earth',
        showland=True,
        landcolor='rgb(240, 240, 240)',
        countrycolor='rgb(200, 200, 200)',
        showocean=True,
        oceancolor='rgb(220, 240, 255)',
        showcountries=True,
        showframe=False
    ),
    margin=dict(l=10, r=10, t=80, b=0),
    hoverlabel=dict(
        bgcolor='white',
        font_size=14,
        font_family='Arial'
    ),
    plot_bgcolor='rgb(250, 250, 250)',
    paper_bgcolor='rgb(250, 250, 250)'
)

# 5. Save outputs
print("\n💾 Saving visualization...")
fig.write_html("e_waste_export_flow_numbered.html")
print("✅ Saved as 'e_waste_export_flow_numbered.html'")

print("\n📄 Saving label reference table...")
origin_labels = pd.DataFrame({
    'No.': range(1, len(origin_list) + 1),
    'City': origin_list,
    'Country': [city_to_country.get(loc, loc) for loc in origin_list]
})
origin_labels.to_csv("origin_label_reference.csv", index=False)
print("✅ Saved as 'origin_label_reference.csv'")

print("\n🖥️ Displaying visualization...")
fig.show()



🔍 Preparing data...

🌍 Geocoding locations...

📊 Processing data...
⏳ Geocoding progress:


100%|██████████| 11/11 [00:19<00:00,  1.78s/it]



🎨 Creating visualization...

💾 Saving visualization...
✅ Saved as 'e_waste_export_flow_numbered.html'

📄 Saving label reference table...
✅ Saved as 'origin_label_reference.csv'

🖥️ Displaying visualization...
