In [1]:
import pandas as pd
import os
import glob
import re
import json

## Utility functions

In [2]:
def write_json(data_dict,filename,data_write_path):
    '''
    Write JSON file into designated raw data folder.
    '''
    
    # Serializing json
    json_object = json.dumps(data_dict, indent=4)

    # Writing to .json
    with open(data_write_path + filename, "w") as outfile:
        outfile.write(json_object)

## Make nested JSON from all the yearly results data

In [3]:
combined_df = pd.read_csv('./data/cleaned/combined/combined_results_parliament.csv', converters = {'parliament_code_digits': str})
ge15_candidates_clean_df = pd.read_csv('./data/cleaned/malaysiakini_newslab/ge15_candidates_clean.csv', converters = {'parliament_code_digits': str})
ge15_incumbents_clean_df = pd.read_csv('./data/cleaned/malaysiakini_newslab/ge15_incumbents_clean.csv', converters = {'parliament_code_digits': str})

combined_df = pd.concat([combined_df, ge15_incumbents_clean_df, ge15_candidates_clean_df])
combined_df.fillna('',inplace=True)

In [4]:
combined_df

Unnamed: 0.1,year,name,coalition,party_code,votes,vote_share,parliament_code_digits,constituency,state,winner,total_votes,Unnamed: 0,gender,results_added
0,2004,DATUK SERI AZMI KHALID,BN,UMNO,18322,66.917458,001,PADANG BESAR,Perlis,1,27380,,,
1,2004,WAN KHARIZAL WAN KHAZIM,BA,PAS,9058,33.082542,001,PADANG BESAR,Perlis,0,27380,,,
2,2004,DATUK RADZI SHEIKH AHMAD,BN,UMNO,22498,69.335552,002,KANGAR,Perlis,1,32448,,,
3,2004,ISHAR SAAD,BA,PAS,9950,30.664448,002,KANGAR,Perlis,0,32448,,,
4,2004,DATUK SERI SYED RAZLAN SYED PUTRA JAMALULLAIL,BN,UMNO,17367,55.149090,003,ARAU,Perlis,1,31491,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
940,2022,LIDANG DISEN,GPS,PDP,0,0.000000,192-000,MAS GADING,Sarawak,0,0,940,F,0
941,2022,MORDI BIMOL,PH,DAP,0,0.000000,192-000,MAS GADING,Sarawak,0,0,941,F,0
942,2022,CHONG CHIENG JEN,PH,DAP,0,0.000000,196-000,STAMPIN,Sarawak,0,0,942,F,0
943,2022,LO KHERE CHANG,GPS,SUPP,0,0.000000,196-000,STAMPIN,Sarawak,0,0,943,F,0


In [5]:
combined_df.columns

Index(['year', 'name', 'coalition', 'party_code', 'votes', 'vote_share',
       'parliament_code_digits', 'constituency', 'state', 'winner',
       'total_votes', 'Unnamed: 0', 'gender', 'results_added'],
      dtype='object')

In [6]:
combined_df['coalition'].value_counts().index

Index(['BN', '', 'PH', 'PR', 'PN', 'GS', 'BA', 'GTA', 'GPS', 'USA', 'GRS'], dtype='object')

In [7]:
parties = combined_df['party_code'].value_counts().index
parties = parties.to_list()
parties.sort()
parties

['AMANAH',
 'ANAK NEGERI',
 'BERJASA',
 'BERSAMA',
 'BERSATU',
 'BERSEKUTU',
 'BN',
 'DAP',
 'DIRECT',
 'GERAKAN',
 'IKATAN',
 'IMAN',
 'IND',
 'INDEPENDENT',
 'IPF',
 'KDM',
 'KIMMA',
 'KITA',
 'LDP',
 'MCA',
 'MDP',
 'MIC',
 'MMSP',
 'MUDA',
 'MUP',
 'MUPP',
 'MYPPP',
 'PAP',
 'PAS',
 'PASOK',
 'PBB',
 'PBDS',
 'PBDSB',
 'PBK',
 'PBM',
 'PBRS',
 'PBS',
 'PCM',
 'PCS',
 'PDP',
 'PEACE',
 'PEJUANG',
 'PFP',
 'PHRS',
 'PKR',
 'PPBM',
 'PPP',
 'PPRS',
 'PRM',
 'PRS',
 'PSB',
 'PSM',
 'PUR',
 'PUTRA',
 'SAPP',
 'SEDAR',
 'SETIA',
 'SNAP',
 'SPDP',
 'STAR',
 'SUPP',
 'SWP',
 'UMNO',
 'UPKO',
 'VACANT',
 'WARISAN']

In [8]:
combined_df_year_district = combined_df.set_index(['year','parliament_code_digits'])

In [9]:
year_district_pairs = combined_df_year_district.index.unique().to_list()

In [10]:
nested_year_district = {}

In [11]:
for year_district in year_district_pairs:

  [year,district] = year_district
  
  if year not in nested_year_district:
    nested_year_district[year] = {}
  
  nested_year_district[year][district] = {}

  district_columns = ['total_votes','constituency','state']

  result_columns = ['name','party_code','coalition','votes','vote_share','winner','gender','results_added']

  nested_year_district[year][district]['info'] = combined_df_year_district.loc[(year,district),:][district_columns].to_dict('records')[0]

  nested_year_district[year][district]['results'] = combined_df_year_district.loc[(year,district),:][result_columns].to_dict('records')

  winner_list = [row for row in nested_year_district[year][district]['results'] if row['winner'] == 1]
  winner = ''
  if len(winner_list) > 0:
    winner = winner_list[0]
  nested_year_district[year][district]['winner'] = winner

    

  return self._getitem_tuple(key)


In [12]:
write_json(nested_year_district,'nested_results_parliament.json','./data/cleaned/combined/')