**Full logic to scarp all the 2025 localbody election data**


In [None]:
import requests
import pandas as pd
import time
import urllib3
import json
import os

# --- 1. CONFIGURATION ---
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# 2025 URLs
URL_HOME = "https://lbtrend.kerala.gov.in/"
URL_LIST = "https://lbtrend.kerala.gov.in/includes/stateView2_ajax.php"
URL_DETAILS = "https://lbtrend.kerala.gov.in/includes/lb_ajax2.php"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
    "X-Requested-With": "XMLHttpRequest",
    "Referer": "https://lbtrend.kerala.gov.in/"
}

# ALL 14 DISTRICTS
DISTRICTS = {
    "01": "Thiruvananthapuram", "02": "Kollam", "03": "Pathanamthitta",
    "04": "Alappuzha", "05": "Kottayam", "06": "Idukki", "07": "Ernakulam",
    "08": "Thrissur", "09": "Palakkad", "10": "Malappuram", "11": "Kozhikode",
    "12": "Wayanad", "13": "Kannur", "14": "Kasaragod"
}

# --- 2. HELPER FUNCTIONS ---

def deep_search_for_list(data, min_length=1):
    """Recursive search for lists in JSON tree"""
    candidates = []
    if isinstance(data, dict):
        for k, v in data.items():
            if isinstance(v, list) and len(v) >= min_length:
                candidates.append(v)
            elif isinstance(v, (dict, list)):
                candidates.extend(deep_search_for_list(v, min_length))
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (dict, list)):
                candidates.extend(deep_search_for_list(item, min_length))
    return candidates

def get_candidates_level_4(session, ward_full_id):
    """Fetches Level 4 Candidate Data (Party/Name/Votes)"""
    payload_can = {"_p": "can", "_w": ward_full_id, "_t": "P", "_s": "L"}
    try:
        resp_can = session.post(URL_DETAILS, data=payload_can, verify=False)
        can_json = resp_can.json()

        can_lists = deep_search_for_list(can_json, min_length=1)

        if can_lists:
            candidates = max(can_lists, key=len)
            best_candidate = None
            max_v = -1

            for can in candidates:
                try:
                    # Index 4 is Votes
                    votes = int(can[4])
                    if votes > max_v:
                        max_v = votes
                        best_candidate = can
                except: pass

            if best_candidate:
                return best_candidate[0], best_candidate[3], max_v
    except:
        pass
    return "Unknown", "Unknown", 0

# --- 3. MAIN SCRAPER (2025 UNIVERSAL REBUILD) ---

def scrape_2025_complete_rebuild():
    print("--- STARTING FULL 2025 REBUILD (ALL 14 DISTRICTS) ---")

    session = requests.Session()
    session.headers.update(HEADERS)

    try: session.get(URL_HOME, verify=False)
    except: pass

    # --- OUTER LOOP: DISTRICTS ---
    for dist_code, dist_name in DISTRICTS.items():
        print(f"\n==================================================")
        print(f"PROCESSING DISTRICT: {dist_name} ({dist_code})")
        print(f"==================================================")

        district_data = []
        district_id = f"D{dist_code}001"

        # ---------------------------------------------------------
        # PART A: STANDARD TYPES (P, B, D)
        # ---------------------------------------------------------
        STANDARD_TYPES = [
            {"code": "P", "name": "Grama Panchayat"},
            {"code": "B", "name": "Block Panchayat"},
            {"code": "D", "name": "District Panchayat"}
        ]

        for b_type in STANDARD_TYPES:
            type_code = b_type['code']
            type_name = b_type['name']

            print(f"   >>> Scanning {type_name} ('{type_code}')...")

            payload_list = {"_p": "dv", "_l": type_code, "_d": district_id, "_s": "L"}
            try:
                resp = session.post(URL_LIST, data=payload_list, verify=False)
                data_list = resp.json()
                found_lists = deep_search_for_list(data_list, min_length=1)

                if found_lists:
                    raw_list = max(found_lists, key=len)

                    # Deduplication
                    seen_ids = set()
                    for item in raw_list:
                        lb_id = None
                        lb_name = "Unknown"
                        for col in item:
                            s_col = str(col)
                            if len(s_col) > 4 and s_col[0] in ['G','B','D'] and s_col[1].isdigit():
                                lb_id = s_col
                                break
                        if not lb_id: lb_id = str(item[0])

                        if lb_name == "Unknown":
                            for col in item:
                                if isinstance(col, str) and col != lb_id and len(col) > 3:
                                    lb_name = col
                                    break

                        if lb_id and lb_id not in seen_ids:
                            seen_ids.add(lb_id)
                            # PROCESS BODY
                            process_body(session, lb_id, lb_name, type_name, dist_name, district_data)

                    print(f"      -> Finished {type_name}")
                else:
                    print(f"      -> No bodies found.")

            except Exception as e:
                print(f"      -> Error: {e}")

        # ---------------------------------------------------------
        # PART B: URBAN BODIES (HYBRID SCAN)
        # ---------------------------------------------------------
        print(f"   >>> Scanning Urban Bodies (Hybrid Logic)...")

        payload_urban = {"_p": "dv", "_l": "C", "_d": district_id, "_s": "L"}
        try:
            resp = session.post(URL_LIST, data=payload_urban, verify=False)
            u_json = resp.json()

            # STRATEGY 1: CHECK 'PAYLOAD' (Critical for TVM, etc.)
            urban_list = u_json.get('payload', [])

            # STRATEGY 2: DEEP SEARCH (Fallback for others)
            if not urban_list:
                found_lists = deep_search_for_list(u_json, min_length=1)
                if found_lists:
                    urban_list = max(found_lists, key=len)

            if urban_list:
                unique_urban = {}
                for body in urban_list:
                    lb_id = str(body[0])
                    # Filter out "Total" and District IDs ('D...') in Urban list
                    if "Total" in lb_id: continue
                    if lb_id.startswith("D"): continue

                    lb_name = str(body[1]).strip()
                    if lb_name == "Unknown" or len(lb_name) < 2:
                         for col in body:
                            if isinstance(col, str) and len(col) > 3 and col != lb_id:
                                lb_name = col
                                break

                    unique_urban[lb_id] = lb_name

                print(f"      -> Found {len(unique_urban)} Valid Urban Bodies.")

                for uid, uname in unique_urban.items():
                    process_body(session, uid, uname, "Municipality/Corporation", dist_name, district_data)
            else:
                 print("      -> No Urban bodies found.")

        except Exception as e:
            print(f"      -> Urban Error: {e}")

        # ---------------------------------------------------------
        # SAVE FILE IMMEDIATELY
        # ---------------------------------------------------------
        if district_data:
            filename = f"Kerala_{dist_code}_{dist_name}_2025.csv"
            df = pd.DataFrame(district_data)
            df.to_csv(filename, index=False, encoding='utf-8-sig')
            print(f"   -> [SAVED] {filename} with {len(df)} rows.")

def process_body(session, lb_id, lb_name, type_name, dist_name, district_data):
    """Worker function to fetch wards and candidates"""
    payload_w = {"_p": "wv", "_w": lb_id, "_t": "P", "_s": "L"}
    try:
        resp_w = session.post(URL_DETAILS, data=payload_w, verify=False)
        w_json = resp_w.json()

        # Check payload first for wards
        w_list = w_json.get('payload', [])
        if not w_list:
            w_deep = deep_search_for_list(w_json, min_length=1)
            if w_deep: w_list = max(w_deep, key=len)

        if w_list:
            for w in w_list:
                ward_id = str(w[0])
                front = str(w[1])
                w_name = str(w[5]) if len(w)>5 else str(w[2])
                try: w_num = str(int(ward_id[-3:]))
                except: w_num = "0"

                party, c_name, votes = get_candidates_level_4(session, ward_id)
                if party == "Unknown": party = front

                district_data.append({
                    "District": dist_name,
                    "Local Body Type": type_name,
                    "Local Body ID": lb_id,
                    "Local Body": lb_name,
                    "Ward Number": w_num,
                    "Ward Name": w_name,
                    "Candidate": c_name,
                    "Party": party,
                    "Front": front,
                    "Votes": votes,
                    "Year": 2025
                })
    except: pass

if __name__ == "__main__":
    scrape_2025_complete_rebuild()

--- STARTING FULL 2025 REBUILD (ALL 14 DISTRICTS) ---

PROCESSING DISTRICT: Thiruvananthapuram (01)
   >>> Scanning Grama Panchayat ('P')...
      -> Finished Grama Panchayat
   >>> Scanning Block Panchayat ('B')...
      -> Finished Block Panchayat
   >>> Scanning District Panchayat ('D')...
      -> Finished District Panchayat
   >>> Scanning Urban Bodies (Hybrid Logic)...
      -> Found 5 Valid Urban Bodies.
   -> [SAVED] Kerala_01_Thiruvananthapuram_2025.csv with 1837 rows.

PROCESSING DISTRICT: Kollam (02)
   >>> Scanning Grama Panchayat ('P')...
      -> Finished Grama Panchayat
   >>> Scanning Block Panchayat ('B')...
      -> Finished Block Panchayat
   >>> Scanning District Panchayat ('D')...
      -> Finished District Panchayat
   >>> Scanning Urban Bodies (Hybrid Logic)...
      -> Found 5 Valid Urban Bodies.
   -> [SAVED] Kerala_02_Kollam_2025.csv with 1698 rows.

PROCESSING DISTRICT: Pathanamthitta (03)
   >>> Scanning Grama Panchayat ('P')...
      -> Finished Grama Panch

**COMBINING ALL 2025 DISTRICT FILES**

In [None]:
import pandas as pd
import glob
import os

def combine_2025_master():
    print("--- COMBINING ALL 2025 DISTRICT FILES ---")

    # Pattern to match your files
    all_files = glob.glob("Kerala_*_2025.csv") + glob.glob("Kerala_*_2025 (1).csv")

    if not all_files:
        print("No 2025 files found!")
        return

    print(f"Found {len(all_files)} files.")

    df_list = []
    for filename in all_files:
        try:
            df = pd.read_csv(filename)
            # Optional: Add a 'Source File' column if you want to track origin
            # df['Source'] = filename
            df_list.append(df)
            print(f"   -> Merged: {filename} ({len(df)} rows)")
        except Exception as e:
            print(f"   -> Error reading {filename}: {e}")

    if df_list:
        master_df = pd.concat(df_list, ignore_index=True)

        # Sort for neatness (District -> Local Body Type -> Local Body)
        if 'District' in master_df.columns:
            master_df.sort_values(by=['District', 'Local Body Type', 'Local Body'], inplace=True)

        output_file = "Kerala_LocalBodies_Election_2025_MASTER.csv"
        master_df.to_csv(output_file, index=False, encoding='utf-8-sig')

        print(f"\n[SUCCESS] Master File Created: {output_file}")
        print(f"Total Rows: {len(master_df)}")
        print(f"Districts Covered: {master_df['District'].nunique()}")

combine_2025_master()

--- COMBINING ALL 2025 DISTRICT FILES ---
Found 14 files.
   -> Merged: Kerala_03_Pathanamthitta_2025.csv (1099 rows)
   -> Merged: Kerala_04_Alappuzha_2025.csv (1666 rows)
   -> Merged: Kerala_08_Thrissur_2025.csv (2204 rows)
   -> Merged: Kerala_09_Palakkad_2025.csv (2116 rows)
   -> Merged: Kerala_13_Kannur_2025.csv (1812 rows)
   -> Merged: Kerala_01_Thiruvananthapuram_2025.csv (1837 rows)
   -> Merged: Kerala_14_Kasaragod_2025.csv (955 rows)
   -> Merged: Kerala_05_Kottayam_2025.csv (1611 rows)
   -> Merged: Kerala_11_Kozhikode_2025.csv (1903 rows)
   -> Merged: Kerala_07_Ernakulam_2025.csv (2219 rows)
   -> Merged: Kerala_12_Wayanad_2025.csv (587 rows)
   -> Merged: Kerala_06_Idukki_2025.csv (1036 rows)
   -> Merged: Kerala_10_Malappuram_2025.csv (2788 rows)
   -> Merged: Kerala_02_Kollam_2025.csv (1698 rows)

[SUCCESS] Master File Created: Kerala_LocalBodies_Election_2025_MASTER.csv
Total Rows: 23531
Districts Covered: 14
