# Thailand Election Data Processing
## Build CONST_RAW and PARTYLIST_RAW datasets for visualization

This notebook processes election66 and election69 data from JSON sources into JavaScript arrays used by the visualization.

### Setup: Import Libraries

In [2]:
import pandas as pd
import numpy as np
import json
import glob
import os
from pathlib import Path
from typing import Dict, List, Any

print("✓ Libraries imported successfully")

✓ Libraries imported successfully


### Configure Paths

In [3]:
# Define data directories
ELECTION66_JSON_DIR = Path.home() / "Documents/GitHub/th-election69-visualization/election66"
ELECTION69_CONST_DIR = Path.home() / "Documents/GitHub/election-69-OCR-result/data/matched/constituency"
ELECTION69_PL_DIR = Path.home() / "Documents/GitHub/election-69-OCR-result/data/matched/party_list"

# Check if paths exist
print(f"Election66 directory exists: {ELECTION66_JSON_DIR.exists()}")
print(f"Election69 constituency directory exists: {ELECTION69_CONST_DIR.exists()}")
print(f"Election69 party_list directory exists: {ELECTION69_PL_DIR.exists()}")

# List available files
if ELECTION66_JSON_DIR.exists():
    print(f"\nElection66 files:")
    for f in sorted(ELECTION66_JSON_DIR.glob("*.json")):
        print(f"  - {f.name}")

Election66 directory exists: True
Election69 constituency directory exists: True
Election69 party_list directory exists: True

Election66 files:
  - th_election66_info_constituency.json
  - th_election66_info_mp_candidate.json
  - th_election66_info_party_overview.json
  - th_election66_info_province.json
  - th_election66_stats_cons.json
  - th_election66_stats_party.json


### Region Mapping Dictionary

In [4]:
# Define region mappings (same as in your notebook)
NE_region = ["อำนาจเจริญ","บึงกาฬ","บุรีรัมย์","ชัยภูมิ","กาฬสินธุ์","ขอนแก่น","เลย",
             "มหาสารคาม","มุกดาหาร","นครพนม","นครราชสีมา","หนองบัวลำภู","หนองคาย",
             "ร้อยเอ็ด","สกลนคร","ศรีสะเกษ","สุรินทร์","อุบลราชธานี","อุดรธานี","ยโสธร"]
N_region  = ["เชียงใหม่","เชียงราย","ลำปาง","ลำพูน","แม่ฮ่องสอน","น่าน","พะเยา","แพร่","อุตรดิตถ์"]
W_region  = ["ตาก","กาญจนบุรี","ราชบุรี","เพชรบุรี","ประจวบคีรีขันธ์"]
E_region  = ["ฉะเชิงเทรา","จันทบุรี","ชลบุรี","ปราจีนบุรี","ระยอง","สระแก้ว","ตราด"]
C_region  = ["อุทัยธานี","อ่างทอง","ชัยนาท","พระนครศรีอยุธยา","ลพบุรี","นครปฐม","นนทบุรี",
             "ปทุมธานี","นครนายก","นครสวรรค์","สมุทรปราการ","สมุทรสาคร","สมุทรสงคราม",
             "สระบุรี","สิงห์บุรี","สุพรรณบุรี","สุโขทัย","พิษณุโลก","พิจิตร","กำแพงเพชร","เพชรบูรณ์"]
BKK       = ["กรุงเทพมหานคร"]
S_region  = ["ชุมพร","นครศรีธรรมราช","นราธิวาส","ปัตตานี","พัทลุง","สงขลา","สุราษฎร์ธานี",
             "ยะลา","กระบี่","พังงา","ภูเก็ต","ระนอง","สตูล","ตรัง"]

region_dict = {
    **{p: "02 ภาคอีสาน"        for p in NE_region},
    **{p: "01 ภาคเหนือ"        for p in N_region},
    **{p: "06 ภาคตะวันตก"      for p in W_region},
    **{p: "03 ภาคตะวันออก"     for p in E_region},
    **{p: "04 ภาคกลาง"         for p in C_region},
    **{p: "05 กรุงเทพมหานคร"   for p in BKK},
    **{p: "07 ภาคใต้"          for p in S_region},
}

print(f"✓ Region mapping defined: {len(region_dict)} provinces")

✓ Region mapping defined: 77 provinces


### Load Election69 Data from JSON

In [5]:
def load_json_folder(folder, ballot_type="constituency"):
    """Load election results from JSON folder (as per your notebook)"""
    rows = []
    
    json_files = sorted(glob.glob(os.path.join(folder, "*.json")))
    print(f"Found {len(json_files)} {ballot_type} files")
    
    for fpath in json_files:
        with open(fpath, encoding="utf-8") as f:
            d = json.load(f)

        province    = d["province_name_normalized"]
        cons_no     = d["constituency_number"]
        summary     = d["summary"]
        results     = d["results"]

        # Sort by votes descending 
        sorted_results = sorted(results, key=lambda x: x["votes"], reverse=True)
        winner   = sorted_results[0]  if len(sorted_results) > 0 else {"party": None, "votes": 0}
        runnerup = sorted_results[1]  if len(sorted_results) > 1 else {"party": None, "votes": 0}

        total_valid = summary["good_votes"]
        others = total_valid - winner["votes"] - runnerup["votes"]

        rows.append({
            "province":           province,
            "constituency_number": cons_no,
            "total_valid":        total_valid,
            "invalid_ballots":    summary["invalid_votes"],
            "no_votes":           summary["no_votes"],
            "voters_came":        summary["voters_came"],
            "winning_score":      winner["votes"],
            "winning_party":      winner["party"],
            "runnerUp_score":     runnerup["votes"],
            "runnerUp_party":     runnerup["party"],
            "others_score":       max(others, 0),
        })
    return pd.DataFrame(rows)

# Load data
if ELECTION69_CONST_DIR.exists():
    cons_df = load_json_folder(str(ELECTION69_CONST_DIR), "constituency")
    print(f"Loaded {len(cons_df)} constituency records")
    print(cons_df.head())
else:
    print("⚠ Election69 constituency data directory not found")
    cons_df = pd.DataFrame()

Found 387 constituency files
Loaded 387 constituency records
        province  constituency_number  total_valid  invalid_ballots  no_votes  \
0  กรุงเทพมหานคร                    1        77075             1146      4200   
1  กรุงเทพมหานคร                   10        91913             1521      6223   
2  กรุงเทพมหานคร                   11        94994             1207      4167   
3  กรุงเทพมหานคร                   12       103439             1601      5989   
4  กรุงเทพมหานคร                   13        90508             1315      5242   

   voters_came  winning_score winning_party  runnerUp_score runnerUp_party  \
0        82421          34167       ประชาชน           14813   ประชาธิปัตย์   
1        99657          41804       ประชาชน           19047       เพื่อไทย   
2       100367          38779       ประชาชน           24850       เพื่อไทย   
3       111029          49925       ประชาชน           16106      ภูมิใจไทย   
4        97065          44511       ประชาชน           15227   

In [6]:
if ELECTION69_PL_DIR.exists():
    pl_df = load_json_folder(str(ELECTION69_PL_DIR), "party_list")
    print(f"\nLoaded {len(pl_df)} party_list records")
    print(pl_df.head())
else:
    print("⚠ Election69 party_list data directory not found")
    pl_df = pd.DataFrame()

Found 386 party_list files

Loaded 386 party_list records
        province  constituency_number  total_valid  invalid_ballots  no_votes  \
0  กรุงเทพมหานคร                    1        78517             1726      2178   
1  กรุงเทพมหานคร                   10        94890             1779      2987   
2  กรุงเทพมหานคร                   11        96157             1769      2486   
3  กรุงเทพมหานคร                   12       106754             1499      2776   
4  กรุงเทพมหานคร                   13        93463             1380      2222   

   voters_came  winning_score winning_party  runnerUp_score runnerUp_party  \
0        82421          34215       ประชาชน           16471      ภูมิใจไทย   
1        99657          44757       ประชาชน           16002      ภูมิใจไทย   
2       100367          44980       ประชาชน           18501      ภูมิใจไทย   
3       111029          52294       ประชาชน           19124      ภูมิใจไทย   
4        97065          44989       ประชาชน           17977      

### Merge Election69 Data (Constituency + Party List)

In [7]:
# Merge on province + constituency number
if not cons_df.empty and not pl_df.empty:
    voters69 = pd.merge(
        cons_df.rename(columns=lambda c: f"const_{c}" if c not in ("province","constituency_number") else c),
        pl_df.rename(columns=lambda c: f"party_{c}" if c not in ("province","constituency_number") else c),
        on=["province", "constituency_number"]
    )
    
    # Calculate ballot difference
    voters69["voters_diff_const_PL"] = (
        (voters69["const_total_valid"] + voters69["const_invalid_ballots"] + voters69["const_no_votes"]) -
        (voters69["party_total_valid"] + voters69["party_invalid_ballots"] + voters69["party_no_votes"])
    )
    
    # Add region
    voters69["region"] = voters69["province"].map(region_dict)
    
    print(f"✓ Merged {len(voters69)} records")
    print(f"\nData structure:")
    print(voters69.info())
    print(f"\nSample:")
    print(voters69.head())
else:
    voters69 = pd.DataFrame()
    print("⚠ Cannot merge - one or both dataframes are empty")

✓ Merged 385 records

Data structure:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 385 entries, 0 to 384
Data columns (total 22 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   province               385 non-null    object
 1   constituency_number    385 non-null    int64 
 2   const_total_valid      385 non-null    int64 
 3   const_invalid_ballots  385 non-null    int64 
 4   const_no_votes         385 non-null    int64 
 5   const_voters_came      385 non-null    int64 
 6   const_winning_score    385 non-null    int64 
 7   const_winning_party    385 non-null    object
 8   const_runnerUp_score   385 non-null    int64 
 9   const_runnerUp_party   385 non-null    object
 10  const_others_score     385 non-null    int64 
 11  party_total_valid      385 non-null    int64 
 12  party_invalid_ballots  385 non-null    int64 
 13  party_no_votes         385 non-null    int64 
 14  party_voters_came      385 non-null 

### Transform to CONST_RAW Format

In [8]:
if not voters69.empty:
    # Build CONST_RAW - constituency MP data
    CONST_RAW = []
    
    for idx, row in voters69.iterrows():
        const_invalid = row["const_invalid_ballots"]
        const_voters = row["const_voters_came"]
        const_invalid_pct = (const_invalid / const_voters * 100) if const_voters > 0 else 0
        const_margin = row["const_winning_score"] - row["const_runnerUp_score"]
        
        record = {
            "prov_id": row["province"][:3].upper(),
            "province_thai": row["province"],
            "province_eng": row["province"],  # You'd map this properly
            "cons_no": row["constituency_number"],
            "invalid_votes": 0,  # From election66
            "percent_invalid": 0,  # From election66
            "turn_out": 0,  # From election66
            "invalid_2026": const_invalid,
            "turnout_2026": const_voters,
            "pct_turnout_2026": (const_voters / (const_voters + row["const_no_votes"]) * 100) if (const_voters + row["const_no_votes"]) > 0 else 0,
            "invalid_change": 0,  # Will calculate with election66
            "invalid_pct_2026": const_invalid_pct,
            "invalid_pct_change": 0,  # Will calculate with election66
            "winner_party": "Unknown",  # From election66
            "margin_votes": 0,  # From election66
            "margin_2569": const_margin,
            "runnerup_votes": row["const_runnerUp_score"],
            "winner_votes": row["const_winning_score"],
            "winner_party_2569": row["const_winning_party"],
            "winner_votes_2569": row["const_winning_score"],
        }
        CONST_RAW.append(record)
    
    print(f"✓ Created CONST_RAW with {len(CONST_RAW)} records")
    print(f"Sample record:")
    print(json.dumps(CONST_RAW[0], ensure_ascii=False, indent=2))
else:
    CONST_RAW = []
    print("⚠ Cannot create CONST_RAW - voters69 is empty")

✓ Created CONST_RAW with 385 records
Sample record:
{
  "prov_id": "กรุ",
  "province_thai": "กรุงเทพมหานคร",
  "province_eng": "กรุงเทพมหานคร",
  "cons_no": 1,
  "invalid_votes": 0,
  "percent_invalid": 0,
  "turn_out": 0,
  "invalid_2026": 1146,
  "turnout_2026": 82421,
  "pct_turnout_2026": 95.15129125731634,
  "invalid_change": 0,
  "invalid_pct_2026": 1.3904223438201428,
  "invalid_pct_change": 0,
  "winner_party": "Unknown",
  "margin_votes": 0,
  "margin_2569": 19354,
  "runnerup_votes": 14813,
  "winner_votes": 34167,
  "winner_party_2569": "ประชาชน",
  "winner_votes_2569": 34167
}


### Transform to PARTYLIST_RAW Format

In [9]:
if not voters69.empty:
    # Build PARTYLIST_RAW - party list MP data
    PARTYLIST_RAW = []
    
    for idx, row in voters69.iterrows():
        pl_invalid = row["party_invalid_ballots"]
        pl_voters = row["party_voters_came"]
        pl_invalid_pct = (pl_invalid / pl_voters * 100) if pl_voters > 0 else 0
        pl_margin = row["party_winning_score"] - row["party_runnerUp_score"]
        
        record = {
            "prov_id": row["province"][:3].upper(),
            "province_thai": row["province"],
            "province_eng": row["province"],
            "cons_no": row["constituency_number"],
            "invalid_votes": 0,  # From election66
            "percent_invalid": 0,  # From election66
            "turn_out": 0,  # From election66
            "invalid_2026": pl_invalid,
            "turnout_2026": pl_voters,
            "pct_turnout_2026": (pl_voters / (pl_voters + row["party_no_votes"]) * 100) if (pl_voters + row["party_no_votes"]) > 0 else 0,
            "invalid_change": 0,  # Will calculate with election66
            "invalid_pct_2026": pl_invalid_pct,
            "invalid_pct_change": 0,  # Will calculate with election66
            "winner_party": "Unknown",
            "margin_votes": 0,
            "margin_2569": pl_margin,
            "runnerup_votes": row["party_runnerUp_score"],
            "winner_votes": row["party_winning_score"],
            "winner_party_2569": row["party_winning_party"],
            "winner_votes_2569": row["party_winning_score"],
        }
        PARTYLIST_RAW.append(record)
    
    print(f"✓ Created PARTYLIST_RAW with {len(PARTYLIST_RAW)} records")
    print(f"Sample record:")
    print(json.dumps(PARTYLIST_RAW[0], ensure_ascii=False, indent=2))
else:
    PARTYLIST_RAW = []
    print("⚠ Cannot create PARTYLIST_RAW - voters69 is empty")

✓ Created PARTYLIST_RAW with 385 records
Sample record:
{
  "prov_id": "กรุ",
  "province_thai": "กรุงเทพมหานคร",
  "province_eng": "กรุงเทพมหานคร",
  "cons_no": 1,
  "invalid_votes": 0,
  "percent_invalid": 0,
  "turn_out": 0,
  "invalid_2026": 1726,
  "turnout_2026": 82421,
  "pct_turnout_2026": 97.42550148346906,
  "invalid_change": 0,
  "invalid_pct_2026": 2.094126496887929,
  "invalid_pct_change": 0,
  "winner_party": "Unknown",
  "margin_votes": 0,
  "margin_2569": 17744,
  "runnerup_votes": 16471,
  "winner_votes": 34215,
  "winner_party_2569": "ประชาชน",
  "winner_votes_2569": 34215
}


### Export as JavaScript

In [10]:
# Create JavaScript file content
js_content = f"""// Thailand Election Data - Auto-generated
// Exported from Jupyter notebook

// Constituency MP (ส.ส. เขต) - 400 constituencies
const CONST_RAW = {json.dumps(CONST_RAW, ensure_ascii=False, indent=2)};

// Party List MP (บส. รายชื่อ)
const PARTYLIST_RAW = {json.dumps(PARTYLIST_RAW, ensure_ascii=False, indent=2)};
"""

# Save to file
output_path = Path.home() / "Documents/GitHub/th-election69-visualization/election_data_generated.js"
with open(output_path, 'w', encoding='utf-8') as f:
    f.write(js_content)

print(f"✓ Exported to: {output_path}")
print(f"✓ CONST_RAW records: {len(CONST_RAW)}")
print(f"✓ PARTYLIST_RAW records: {len(PARTYLIST_RAW)}")
print(f"\nFile size: {len(js_content) / 1024:.1f} KB")

✓ Exported to: /home/ronnie-rattan/Documents/GitHub/th-election69-visualization/election_data_generated.js
✓ CONST_RAW records: 385
✓ PARTYLIST_RAW records: 385

File size: 437.0 KB


### Integration Instructions

To use the generated data in your visualization:

1. **Option A: Replace RAW data** - Copy the generated JavaScript and replace the `const RAW = [...]` in your index.html with the new data

2. **Option B: Both datasets** - Update your index.html to:
   - Keep `CONST_RAW` with constituency data
   - Update `PARTYLIST_RAW` with the party list data

3. **Update the toggle function** in your `switchDataset()` function to use the correct raw data:
   ```javascript
   const selectedRaw = dataset === 'partylist' ? PARTYLIST_RAW : CONST_RAW;
   ```