In [1]:
import os 
import pandas as pd 
import numpy as np 



In [2]:
def get_election_df(type):
    if type == 'AE':
        election_df = pd.read_csv('data/Chhattisgarh_AE.csv')
    # if type == 'GA':
    #     election_df = pd.read_csv('data/Maharashtra_GA.csv')
        
    return election_df
        

In [3]:

def get_parties(year,AC):
    top_3 = [1,2,3]
    # Define conditions
    condition_general = (
        (election_df['Year'] == year) &
        (election_df['Constituency_No'] == AC) &
        (~election_df['Candidate'].isin(['None of the Above', 'NOTA']))
    )

    condition_inc = (
        (election_df['Party'] == 'INC')
    )

    # Use bitwise OR to combine conditions
    filtered_election_df = election_df[
        (condition_general & election_df['Position'].isin(top_3)) |
        (condition_inc & condition_general)
    ]
    
    return filtered_election_df.sort_values(by='Position', ascending=True)['Party'].tolist()


In [4]:
def intermediate_table(df, year):
    constituency = df.iloc[0]['Constituency']
    year = df.iloc[0]['Year']

    parties_ordered = get_parties(year, constituency)
    
    # Filter the columns that contains parties_ordered and col_ and 'NOTA' 
    col_columns = [col for col in df.columns if col in parties_ordered or col.startswith('col_') or col == 'NOTA']

    cols = pd.Series(df.columns)
    for dup in cols[cols.duplicated()].unique(): 
        cols[cols[cols == dup].index.values.tolist()] = [dup + '_' + str(i) if i != 0 else dup for i in range(sum(cols == dup))]
    df.columns = cols
    
    # Add INC Status Column
    max_votes_column = df[col_columns].idxmax(axis=1)
    df['INC_Status'] = max_votes_column.map(lambda x: 'WON' if x == 'INC' else 'LOSS')

    
    top_parties = parties_ordered[:3] 

    for party in top_parties:
        df[f'{party} Share%'] = ((df[party] / df['Total']) * 100).round(2)

    # Prepare the columns for the final DataFrame
    final_columns = ['SN', 'Constituency', 'Year'] + [item for party in top_parties for item in (party, f'{party} Share%')] + ['Total','INC_Status']
    final_df = df[final_columns]
    
    # delete rows where SN is null
    final_df = final_df[final_df['SN'].notnull()]
    
    return final_df, constituency


In [5]:

import os 


def create_intermediate_tables_and_dump_excel_files(excel_dir, output_dir, log_file_name, year):
    os.makedirs(output_dir, exist_ok=True)
    for filename in os.listdir(excel_dir):
        if filename.endswith('.xlsx'):
            try: 
                df = pd.read_excel(os.path.join(excel_dir, filename))
                intermediate_df, constituency = intermediate_table(df, year)
                output_file_path = os.path.join(output_dir, f"{constituency}.xlsx")
                intermediate_df.to_excel(output_file_path, index=False)
            except Exception as exc: 
                with open(log_file_name, 'a') as f: 
                    f.write(f"Error processing file: {filename} - {exc}\n")
                continue
                

In [6]:

excel_dir = 'results/cleaned_election_data/CH/AE_2018'
output_dir = 'results/intermediate_tables/CH/AE_2018'
log_file_name = "logs/CH_AE_election_intermediate_2018_log.txt"
year = 2018
type = 'AE'
election_df = get_election_df(type)

create_intermediate_tables_and_dump_excel_files(excel_dir, output_dir, log_file_name, year)

In [12]:
# excel_dir = 'results/cleaned_election_data/MH/GE_2014'
# output_dir = 'results/intermediate_tables/MH/GE_2014'
# log_file_name = "logs/maharshtra_loksabha_election_intermediate_2014_cleaning_log.txt"
# year = 2014

# excel_dir = 'cleaned_election_data/KA/2018'
# output_dir = 'intermediate_tables/KA/2018'
# log_file_name = "logs/karnataka_assembly_election_intermediate_2018_cleaning_log.txt"
# year = 2018

# excel_dir = 'cleaned_election_data/KA/2014'
# output_dir = 'intermediate_tables/KA/2014'
# log_file_name = "logs/karnataka_assembly_election_intermediate_2014_cleaning_log.txt"
# year = 2014

# excel_dir = 'results/cleaned_election_data/RA/2018'
# output_dir = 'results/intermediate_tables/RA/2018'
# log_file_name = "logs/rajasthan_election_intermediate_2018_log.txt"

# excel_dir = 'results/cleaned_election_data/MH/2019_AE'
# output_dir = 'results/intermediate_tables/MH/2019_AE'
# log_file_name = "logs/mh_ae_election_intermediate_2019_log.txt"
# year = 2019
# type = 'AE'
# election_df = get_election_df(type)

# excel_dir = 'results/cleaned_election_data/MH/2019_GA'
# output_dir = 'results/intermediate_tables/MH/2019_GA'
# log_file_name = "logs/mh_ga_election_intermediate_2019_log.txt"
# year = 2019
# type = 'GA'
# election_df = get_election_df(type)

# create_intermediate_tables_and_dump_excel_files(excel_dir, output_dir, log_file_name, year)


  max_votes_column = df[col_columns].idxmax(axis=1)
  max_votes_column = df[col_columns].idxmax(axis=1)
  max_votes_column = df[col_columns].idxmax(axis=1)
  max_votes_column = df[col_columns].idxmax(axis=1)


In [15]:
# file_path = 'cleaned_election_data/KA/2014/196.xlsx'
# file_path = 'cleaned_election_data/KA/2018/187.xlsx'
# file_path = 'results/cleaned_election_data/RA/2018/130.xlsx'
file_path = 'results/cleaned_election_data/MH/2019_GA/101.xlsx'

df = pd.read_excel(file_path)
print(df.shape)
intermediate_df, constituency = intermediate_table(df, 2019)

print(intermediate_df.shape)


(93, 29)
(93, 11)


In [16]:
df.size

3069

In [14]:
top_3 = [1,2,3]
year = 2018 
AC = 16
# Define conditions
condition_general = (
    (election_df['Year'] == year) &
    (election_df['Constituency_No'] == AC) &
    (~election_df['Candidate'].isin(['None of the Above', 'NOTA']))
)

condition_inc = (
    (election_df['Party'] == 'INC')
)

# Use bitwise OR to combine conditions
filtered_election_df = election_df[
    (condition_general & election_df['Position'].isin(top_3)) |
    (condition_inc & condition_general)
]

filtered_election_df[['Position','Candidate','Party']].head(10)

Unnamed: 0,Position,Candidate,Party
209,1,SUMIT GODARA,BJP
210,2,VIRENDRA BENIWAL,INC
211,3,PRABHUDAYAL,IND
