In [1]:
!pip install --quiet pandas folium

In [2]:
import pandas as pd
import numpy as np
import json
from datetime import datetime
import folium
from IPython.display import display

# Haversine distance (km)
def haversine(lat1, lon1, lat2, lon2):
    # convert decimals to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    R = 6371.0
    return R * c

# Suitability check: donation (row) and receiver (row)
def is_suitable(donation_row, receiver_row):
    # donor items may be multiple - here donation_row should contain flags summarised (booleans)
    # If receiver requires halal but donation isn't halal -> not suitable
    if receiver_row.get('is_halal_receiver', False) and not donation_row.get('is_halal_donor', False):
        return False
    # If receiver requires allergen-free but donation has allergen -> not suitable
    if receiver_row.get('is_alergan_free', False) and donation_row.get('is_alergan_donor', False):
        return False
    # Age group: if receiver for children True but donation not marked for child -> allow (donation may be generic).
    # For conservative approach: if receiver needs for_child but donation not marked as for_child -> mark as not suitable
    if receiver_row.get('is_for_child_receiver', False) and not donation_row.get('is_for_child_donor', False):
        return False
    if receiver_row.get('is_for_elderly_receiver', False) and not donation_row.get('is_for_elderly_donor', False):
        return False
    return True


In [11]:
from google.colab import files
import pandas as pd

# ✅ Upload CSV files manually
uploaded = files.upload()

# The uploaded files will be saved automatically in the current directory
for fn in uploaded.keys():
    print(f"Uploaded file: {fn}")



Saving data_penerima.csv to data_penerima (1).csv
Uploaded file: data_penerima (1).csv


In [12]:
donor = pd.read_csv("data_donor.csv")
receiver = pd.read_csv("data_penerima.csv")

print("✅ Donor dataset loaded:", donor.shape)
print("✅ Receiver dataset loaded:", receiver.shape)


✅ Donor dataset loaded: (500, 10)
✅ Receiver dataset loaded: (500, 12)


In [14]:
# ✅ Standardize Donor Dataset Columns
def standardize_df_donor(df):
    df = df.rename(columns={
        'id_penyumbang': 'donor_id',
        'makanan_disumbangkan': 'food_type',
        'jumlah_disumbangkan': 'quantity',
        'lokasi_lat_penyumbang': 'latitude',
        'lokasi_lon_penyumbang': 'longitude',
        'kondisi_makanan': 'food_condition',
        'is_halal_donor': 'is_halal',
        'is_for_child_donor': 'is_for_child',
        'is_for_elderly_donor': 'is_for_elderly',
        'is_alergan': 'has_allergen'
    })
    return df


# ✅ Standardize Receiver Dataset Columns
def standardize_df_receiver(df):
    df = df.rename(columns={
        'id_penerima': 'receiver_id',
        'kebutuhan_makanan': 'food_requirement',
        'jumlah_dibutuhkan': 'required_quantity',
        'lokasi_lat_penerima': 'latitude',
        'lokasi_lon_penerima': 'longitude',
        'is_halal_receiver': 'needs_halal',
        'is_for_child_receiver': 'needs_child_friendly',
        'is_for_elderly_receiver': 'needs_elderly_friendly',
        'is_alergan_free': 'needs_allergen_free',
        'status_penerima': 'urgency'
    })
    return df



In [15]:

import numpy as np
from datetime import datetime

def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    R = 6371.0
    return R * c

def suitable(d_row, r_row):
    # d_row and r_row are pandas Series with standardized columns
    # Halal
    if r_row.get('needs_halal', False) and not d_row.get('is_halal', False):
        return False
    # Allergen
    if r_row.get('needs_allergen_free', False) and d_row.get('has_allergen', False):
        return False
    # Child / elderly requirements (conservative)
    if r_row.get('needs_child_friendly', False) and not d_row.get('is_for_child', False):
        return False
    if r_row.get('needs_elderly_friendly', False) and not d_row.get('is_for_elderly', False):
        return False
    # Food condition: prefer 'layak_konsumsi' or 'good' - adapt to your values
    cond = str(d_row.get('food_condition','')).lower()
    if 'tidak' in cond or 'tidak_layak' in cond or 'not' in cond:
        return False
    return True


In [16]:

import pandas as pd

def match_all(donor_df, receiver_df, max_km=10, top_n=5):
    results = []
    # ensure numeric lat/lon columns present
    for d_idx, d in donor_df.iterrows():
        dlat = d.get('latitude')
        dlon = d.get('longitude')
        if pd.isna(dlat) or pd.isna(dlon):
            continue
        candidates = []
        for r_idx, r in receiver_df.iterrows():
            rlat = r.get('latitude')
            rlon = r.get('longitude')
            if pd.isna(rlat) or pd.isna(rlon):
                continue
            dist = haversine(dlat, dlon, rlat, rlon)
            if dist <= max_km and suitable(d, r):
                candidates.append((r_idx, r, dist))
        # sort by distance and take top_n
        candidates = sorted(candidates, key=lambda x: x[2])[:top_n]
        for r_idx, r, dist in candidates:
            results.append({
                'donor_index': int(d_idx),
                'donor_id': d.get('donor_id', None),
                'receiver_index': int(r_idx),
                'receiver_id': r.get('receiver_id', None),
                'distance_km': float(dist),
                'matched_at': datetime.utcnow().isoformat()+"Z"
            })
    return pd.DataFrame(results)

# run with default params
matches_df = match_all(donor, receiver, max_km=10, top_n=5)
print("Matches found:", len(matches_df))
matches_df.head(10)


Matches found: 0


In [17]:

matches_df.to_csv('waste2worth_matches.csv', index=False)
matches_df.to_json('waste2worth_matches.json', orient='records', force_ascii=False)
print("Saved: waste2worth_matches.csv and waste2worth_matches.json")

# build simple notification payloads (one per match)
payloads = matches_df.to_dict(orient='records')
import json
with open('match_notifications.json','w',encoding='utf-8') as f:
    json.dump(payloads, f, ensure_ascii=False, indent=2)
print("Saved: match_notifications.json")


Saved: waste2worth_matches.csv and waste2worth_matches.json
Saved: match_notifications.json


In [19]:
# Run this cell to auto-detect and create standardized latitude/longitude columns
import pandas as pd

# Make sure donor and receiver are loaded already
print("Donor columns:")
print(donor.columns.tolist())
print("\nReceiver columns:")
print(receiver.columns.tolist())

def find_coord_columns(cols):
    # candidate lists prioritized
    lat_candidates = ['lokasi_lat_penyumbang', 'lokasi_lat_penerima', 'lat', 'latitude', 'lokasi_lat', 'latitude_penyumbang', 'latitude_penerima', 'lat_penyumbang', 'lat_penerima']
    lon_candidates = ['lokasi_lon_penyumbang','lokasi_lon_penerima','lon','lng','longitude','lokasi_lon','longitude_penyumbang','longitude_penerima','lon_penyumbang','lon_penerima']
    found_lat = None
    found_lon = None
    lower_cols = [c.lower() for c in cols]
    for cand in lat_candidates:
        if cand in lower_cols:
            found_lat = cols[lower_cols.index(cand)]
            break
    for cand in lon_candidates:
        if cand in lower_cols:
            found_lon = cols[lower_cols.index(cand)]
            break
    # heuristic: look for any column containing 'lat' / 'lon' substring
    if found_lat is None:
        for i,c in enumerate(lower_cols):
            if 'lat' in c and 'note' not in c:
                found_lat = cols[i]; break
    if found_lon is None:
        for i,c in enumerate(lower_cols):
            if ('lon' in c or 'lng' in c or 'long' in c) and 'note' not in c:
                found_lon = cols[i]; break
    return found_lat, found_lon

# detect for donor
d_lat_col, d_lon_col = find_coord_columns(list(donor.columns))
r_lat_col, r_lon_col = find_coord_columns(list(receiver.columns))

print("\nAuto-detected columns:")
print(" Donor lat:", d_lat_col, " Donor lon:", d_lon_col)
print(" Receiver lat:", r_lat_col, " Receiver lon:", r_lon_col)

# create standardized columns if found
if d_lat_col and d_lon_col:
    donor['latitude'] = pd.to_numeric(donor[d_lat_col], errors='coerce')
    donor['longitude'] = pd.to_numeric(donor[d_lon_col], errors='coerce')
    print("\nCreated donor['latitude'] and donor['longitude']")
else:
    print("\nCould NOT auto-detect donor lat/lon. If columns exist under other names, run this to map them manually:")
    print("donor['latitude'] = pd.to_numeric(donor['<YOUR_LAT_COL>'], errors='coerce')")
    print("donor['longitude'] = pd.to_numeric(donor['<YOUR_LON_COL>'], errors='coerce')")

if r_lat_col and r_lon_col:
    receiver['latitude'] = pd.to_numeric(receiver[r_lat_col], errors='coerce')
    receiver['longitude'] = pd.to_numeric(receiver[r_lon_col], errors='coerce')
    print("Created receiver['latitude'] and receiver['longitude']")
else:
    print("\nCould NOT auto-detect receiver lat/lon. If columns exist under other names, run this to map them manually:")
    print("receiver['latitude'] = pd.to_numeric(receiver['<YOUR_LAT_COL>'], errors='coerce')")
    print("receiver['longitude'] = pd.to_numeric(receiver['<YOUR_LON_COL>'], errors='coerce')")

# quick sanity checks
print("\nDonor latitude non-null count:", donor['latitude'].notna().sum() if 'latitude' in donor.columns else 0)
print("Receiver latitude non-null count:", receiver['latitude'].notna().sum() if 'latitude' in receiver.columns else 0)

# If both have zero non-null coords, show small sample of columns to help you pick names
if ('latitude' not in donor.columns or donor['latitude'].notna().sum()==0) and ('latitude' not in receiver.columns or receiver['latitude'].notna().sum()==0):
    print("\nBoth datasets lack usable coordinates. Show sample rows to help you pick the correct columns:")
    display(donor.head(3))
    display(receiver.head(3))


Donor columns:
['id_penyumbang', 'makanan_disumbangkan', 'jumlah_disumbangkan', 'lokasi_lat_penyumbang', 'lokasi_lon_penyumbang', 'kondisi_makanan', 'is_halal_donor', 'is_for_child_donor', 'is_for_elderly_donor', 'is_alergan']

Receiver columns:
['id_penerima', 'makanan_dibutuhkan', 'jumlah_dibutuhkan', 'lokasi_lat_penerima', 'lokasi_lon_penerima', 'frekuensi_menerima', 'kondisi_makanan_diterima', 'is_halal_receiver', 'is_for_child_receiver', 'is_for_elderly_receiver', 'is_alergan_free', 'status_penerima']

Auto-detected columns:
 Donor lat: lokasi_lat_penyumbang  Donor lon: lokasi_lon_penyumbang
 Receiver lat: lokasi_lat_penerima  Receiver lon: lokasi_lon_penerima

Created donor['latitude'] and donor['longitude']
Created receiver['latitude'] and receiver['longitude']

Donor latitude non-null count: 500
Receiver latitude non-null count: 500


In [9]:
# Load dataset first
donor = pd.read_csv('/content/data_donor.csv')
receiver = pd.read_csv('/content/data_penerima.csv')

def standardize_df_donor(df):
    df = df.copy()
    # Lowercase column names
    df.columns = [c.strip().lower() for c in df.columns]
    # Try common column names mapping seen in your CSVs
    # Expected fields: id_penyumbang, makanan_disumbangkan, jumlah_disumbangkan,
    # lokasi_lat_penyumbang, lokasi_lon_penyumbang, kondisi_makanan, is_halal_donor,
    # is_for_child_donor, is_for_elderly_donor, is_alergan
    # Normalize boolean-like columns
    bool_cols = [c for c in df.columns if c.startswith('is_') or 'alerg' in c or 'allerg' in c]
    for c in bool_cols:
        df[c] = df[c].astype(str).str.lower().map({'true': True, 'false': False, '1': True, '0': False, 'yes': True, 'no': False}).fillna(False)
    # parse numeric lat/lon if present
    for c in df.columns:
        if 'lat' in c: df[c] = pd.to_numeric(df[c], errors='coerce')
        if 'lon' in c or 'lng' in c: df[c] = pd.to_numeric(df[c], errors='coerce')
    return df

def standardize_df_receiver(df):
    df = df.copy()
    df.columns = [c.strip().lower() for c in df.columns]
    bool_cols = [c for c in df.columns if c.startswith('is_') or 'alerg' in c or 'allerg' in c]
    for c in bool_cols:
        df[c] = df[c].astype(str).str.lower().map({'true': True, 'false': False, '1': True, '0': False, 'yes': True, 'no': False}).fillna(False)
    for c in df.columns:
        if 'lat' in c: df[c] = pd.to_numeric(df[c], errors='coerce')
        if 'lon' in c or 'lng' in c: df[c] = pd.to_numeric(df[c], errors='coerce')
    return df

donor = standardize_df_donor(donor)
receiver = standardize_df_receiver(receiver)

print("After standardize — donor columns:", donor.columns.tolist())
print("After standardize — receiver columns:", receiver.columns.tolist())


After standardize — donor columns: ['id_penyumbang', 'makanan_disumbangkan', 'jumlah_disumbangkan', 'lokasi_lat_penyumbang', 'lokasi_lon_penyumbang', 'kondisi_makanan', 'is_halal_donor', 'is_for_child_donor', 'is_for_elderly_donor', 'is_alergan']
After standardize — receiver columns: ['id_penerima', 'makanan_dibutuhkan', 'jumlah_dibutuhkan', 'lokasi_lat_penerima', 'lokasi_lon_penerima', 'frekuensi_menerima', 'kondisi_makanan_diterima', 'is_halal_receiver', 'is_for_child_receiver', 'is_for_elderly_receiver', 'is_alergan_free', 'status_penerima']


In [22]:
# Run this single cell to auto-detect columns, create standardized fields and draw map centered on India
import pandas as pd
import folium
from IPython.display import display

# --- 1) Show current columns so you can inspect quickly
print("Donor columns:", donor.columns.tolist())
print("Receiver columns:", receiver.columns.tolist())

# --- 2) helper to find best matching column from candidates
def find_col(cols, candidates):
    lower = [c.lower() for c in cols]
    for cand in candidates:
        if cand.lower() in lower:
            return cols[lower.index(cand.lower())]
    # fallback: find any col that contains the substring
    for cand in candidates:
        for i,c in enumerate(lower):
            if cand.lower() in c:
                return cols[i]
    return None

# candidate lists
id_cands_donor = ['donor_id','id_penyumbang','id','idpenyumbang','id_penyumbang ']
id_cands_recv  = ['receiver_id','id_penerima','id','idpenerima','id_penerima ']
lat_cands = ['lokasi_lat_penyumbang','lokasi_lat_penerima','lat','latitude','lokasi_lat','latitude_penyumbang','latitude_penerima','lat_penyumbang','lat_penerima']
lon_cands = ['lokasi_lon_penyumbang','lokasi_lon_penerima','lon','lng','longitude','lokasi_lon','longitude_penyumbang','longitude_penerima','lon_penyumbang','lon_penerima']
food_cands = ['makanan_disumbangkan','food_type','food','kebutuhan_makanan','food_requirement','makanan']

# detect columns
donor_id_col = find_col(list(donor.columns), id_cands_donor)
recv_id_col  = find_col(list(receiver.columns), id_cands_recv)
dlat_col = find_col(list(donor.columns), lat_cands)
dlon_col = find_col(list(donor.columns), lon_cands)
rlat_col = find_col(list(receiver.columns), lat_cands)
rlon_col = find_col(list(receiver.columns), lon_cands)
dfood_col = find_col(list(donor.columns), food_cands)
rfood_col = find_col(list(receiver.columns), food_cands)

print("\nAuto-detected mapping:")
print(" donor id:", donor_id_col)
print(" receiver id:", recv_id_col)
print(" donor lat/lon:", dlat_col, dlon_col)
print(" receiver lat/lon:", rlat_col, rlon_col)
print(" donor food col:", dfood_col)
print(" receiver food col:", rfood_col)

# --- 3) create standardized columns with fallbacks
donor['donor_id_std'] = donor[donor_id_col].astype(str) if donor_id_col in donor.columns else donor.index.astype(str)
receiver['receiver_id_std'] = receiver[recv_id_col].astype(str) if recv_id_col in receiver.columns else receiver.index.astype(str)

# lat/lon numeric creation (try several fallbacks)
def create_latlon(df, lat_col, lon_col, side):
    # side = 'donor' or 'receiver' for messaging
    if lat_col in df.columns and lon_col in df.columns:
        df['latitude'] = pd.to_numeric(df[lat_col], errors='coerce')
        df['longitude'] = pd.to_numeric(df[lon_col], errors='coerce')
        return True
    else:
        # search for any column name containing 'lat'/'lon'
        found_lat = None; found_lon = None
        for c in df.columns:
            if 'lat' in c.lower() and found_lat is None:
                found_lat = c
            if ('lon' in c.lower() or 'lng' in c.lower() or 'long' in c.lower()) and found_lon is None:
                found_lon = c
        if found_lat and found_lon:
            df['latitude'] = pd.to_numeric(df[found_lat], errors='coerce')
            df['longitude'] = pd.to_numeric(df[found_lon], errors='coerce')
            print(f"  mapped {side} lat/lon from {found_lat}/{found_lon}")
            return True
    print(f"Warning: could not create standardized latitude/longitude for {side}. Please provide lat/lon columns.")
    return False

ok_dcoords = create_latlon(donor, dlat_col, dlon_col, 'donor')
ok_rcoords = create_latlon(receiver, rlat_col, rlon_col, 'receiver')

# food type fallback
if dfood_col in donor.columns:
    donor['food_type_std'] = donor[dfood_col].astype(str)
else:
    # try common alternatives
    fallback = None
    for c in donor.columns:
        if 'makan' in c.lower() or 'food' in c.lower():
            fallback = c; break
    if fallback: donor['food_type_std'] = donor[fallback].astype(str)
    else: donor['food_type_std'] = 'unknown'

if rfood_col in receiver.columns:
    receiver['food_type_std'] = receiver[rfood_col].astype(str)
else:
    fallback = None
    for c in receiver.columns:
        if 'makan' in c.lower() or 'food' in c.lower() or 'kebutuhan' in c.lower():
            fallback = c; break
    if fallback: receiver['food_type_std'] = receiver[fallback].astype(str)
    else: receiver['food_type_std'] = 'unknown'

# quick sanity print
print("\nCounts of usable coords:")
print(" donor latitude non-null:", donor['latitude'].notna().sum())
print(" receiver latitude non-null:", receiver['latitude'].notna().sum())

# --- 4) Create map centered on India and draw points + matches (safe: check columns exist)
india_center = [20.5937, 78.9629]
m = folium.Map(location=india_center, zoom_start=5)

# donors red
if 'latitude' in donor.columns and 'longitude' in donor.columns:
    for _, d in donor.iterrows():
        lat = d['latitude']; lon = d['longitude']
        if pd.isna(lat) or pd.isna(lon): continue
        tooltip = f"Donor: {d.get('donor_id_std','')}, Food: {d.get('food_type_std','')}"
        folium.CircleMarker([lat,lon], radius=5, tooltip=tooltip, color='red', fill=True).add_to(m)
else:
    print("No donor coords to plot.")

# receivers blue
if 'latitude' in receiver.columns and 'longitude' in receiver.columns:
    for _, r in receiver.iterrows():
        lat = r['latitude']; lon = r['longitude']
        if pd.isna(lat) or pd.isna(lon): continue
        tooltip = f"Receiver: {r.get('receiver_id_std','')}, Needs: {r.get('food_type_std','')}"
        folium.CircleMarker([lat,lon], radius=5, tooltip=tooltip, color='blue', fill=True).add_to(m)
else:
    print("No receiver coords to plot.")

# draw match lines if matches_df exists and has donor_index/receiver_index
if 'matches_df' in globals() and not matches_df.empty:
    for _, row in matches_df.head(200).iterrows():
        try:
            drow = donor.loc[int(row['donor_index'])]
            rrow = receiver.loc[int(row['receiver_index'])]
            if pd.isna(drow['latitude']) or pd.isna(drow['longitude']) or pd.isna(rrow['latitude']) or pd.isna(rrow['longitude']):
                continue
            folium.PolyLine(locations=[[drow['latitude'], drow['longitude']], [rrow['latitude'], rrow['longitude']]], color='green', weight=1, opacity=0.6).add_to(m)
        except Exception as e:
            # skip if indexing issues
            continue

# legend
legend_html = """
<div style="
     position: fixed;
     bottom: 50px;
     left: 50px;
     width: 180px;
     height: 120px;
     background-color: white;
     border: 2px solid grey;
     z-index: 9999;
     font-size: 14px;
     padding: 10px;">
<b>Legend</b><br>
<span style="color:red;">&#9679;</span> Donor (Food Source)<br>
<span style="color:blue;">&#9679;</span> Receiver / NGO<br>
<span style="color:green;">&#8211;</span> Matched Food Route
</div>
"""
m.get_root().html.add_child(folium.Element(legend_html))

display(m)

# --- 5) If there were no coords, help the user by printing a sample of rows
if donor['latitude'].notna().sum()==0 or receiver['latitude'].notna().sum()==0:
    print("\nSample donor rows (first 3):")
    display(donor.head(3))
    print("\nSample receiver rows (first 3):")
    display(receiver.head(3))
    print("\nIf lat/lon are under different column names, copy the exact column names printed above and run:")
    print(" donor['latitude'] = pd.to_numeric(donor['<your_lat_col>'], errors='coerce')")
    print(" donor['longitude'] = pd.to_numeric(donor['<your_lon_col>'], errors='coerce')")
    print(" receiver['latitude'] = pd.to_numeric(receiver['<your_lat_col>'], errors='coerce')")
    print(" receiver['longitude'] = pd.to_numeric(receiver['<your_lon_col>'], errors='coerce')")


Donor columns: ['id_penyumbang', 'makanan_disumbangkan', 'jumlah_disumbangkan', 'lokasi_lat_penyumbang', 'lokasi_lon_penyumbang', 'kondisi_makanan', 'is_halal_donor', 'is_for_child_donor', 'is_for_elderly_donor', 'is_alergan', 'latitude', 'longitude']
Receiver columns: ['id_penerima', 'makanan_dibutuhkan', 'jumlah_dibutuhkan', 'lokasi_lat_penerima', 'lokasi_lon_penerima', 'frekuensi_menerima', 'kondisi_makanan_diterima', 'is_halal_receiver', 'is_for_child_receiver', 'is_for_elderly_receiver', 'is_alergan_free', 'status_penerima', 'latitude', 'longitude']

Auto-detected mapping:
 donor id: id_penyumbang
 receiver id: id_penerima
 donor lat/lon: lokasi_lat_penyumbang lokasi_lon_penyumbang
 receiver lat/lon: lokasi_lat_penerima lokasi_lon_penerima
 donor food col: makanan_disumbangkan
 receiver food col: makanan_dibutuhkan

Counts of usable coords:
 donor latitude non-null: 500
 receiver latitude non-null: 500
