In [3]:
import pandas as pd
import numpy as np

In [4]:
# Specify the path to the Excel file
file_path = 'fulldata/scraped/elections_wiki_data.xlsx'

# Read the entire Excel file
excel_data = pd.ExcelFile(file_path)
# List of years to process
years = [1971, 1977, 1980, 1984, 1989, 1991, 1996, 2001, 2006, 2011, 2016]

# Dictionary to store DataFrames for each year's sheets
yearly_data = {}
all_sheets = excel_data.sheet_names

# Loop through each year and find matching sheets
for year in years:
    # Filter sheets matching the current year
    year_sheets = [sheet for sheet in all_sheets if f"Year_{year}" in sheet]
    
    # Load each matching sheet into a dictionary for the year
    yearly_data[year] = {}
    for sheet in year_sheets:
        try:
            yearly_data[year][sheet] = pd.read_excel(file_path, sheet_name=sheet)
            print(f"Loaded sheet: {sheet} for year {year}")
        except Exception as e:
            print(f"Error reading sheet {sheet} for year {year}: {e}")

Loaded sheet: Year_1971_Wiki_1 for year 1971
Loaded sheet: Year_1971_Wiki_2 for year 1971
Loaded sheet: Year_1971_Wiki_3 for year 1971
Loaded sheet: Year_1971_Wiki_4 for year 1971
Loaded sheet: Year_1971_Wiki_5 for year 1971
Loaded sheet: Year_1971_Wiki_6 for year 1971
Loaded sheet: Year_1971_Wiki_7 for year 1971
Loaded sheet: Year_1971_Wiki_8 for year 1971
Loaded sheet: Year_1971_Wiki_9 for year 1971
Loaded sheet: Year_1971_Wiki_10 for year 1971
Loaded sheet: Year_1971_Wiki_11 for year 1971
Loaded sheet: Year_1977_Wiki_1 for year 1977
Loaded sheet: Year_1977_Wiki_2 for year 1977
Loaded sheet: Year_1977_Wiki_3 for year 1977
Loaded sheet: Year_1977_Wiki_4 for year 1977
Loaded sheet: Year_1977_Wiki_5 for year 1977
Loaded sheet: Year_1977_Wiki_6 for year 1977
Loaded sheet: Year_1977_Wiki_7 for year 1977
Loaded sheet: Year_1977_Wiki_8 for year 1977
Loaded sheet: Year_1977_Wiki_9 for year 1977
Loaded sheet: Year_1977_Wiki_10 for year 1977
Loaded sheet: Year_1977_Wiki_11 for year 1977
Loaded

Leaders Table

In [5]:
def Leaders():
    def process_year_data(year, sheet_name, drop_first_row=False, four_party_format=False):
        df = yearly_data[year].get(sheet_name)
        
        if df is None:
            print(f"Data for {year} not found.")
            return None

        # Drop NaN rows
        df.dropna(inplace=True)

        # Drop first row if needed
        if drop_first_row:
            df = df.iloc[1:]

        # Drop unnecessary columns
        if "Unnamed: 3" in df.columns:
            df = df.drop(["Unnamed: 3"], axis=1)

        # If it's a four-party format, split and merge data
        if four_party_format:
            df_1 = df.iloc[:9, :]
            df_2 = df.iloc[9:, :]
            
            # Drop 'Unnamed: 3' from both parts if it exists
            df_1 = df_1.drop(["Unnamed: 3"], axis=1, errors="ignore")
            df_2 = df_2.drop(["Unnamed: 3"], axis=1, errors="ignore")
            
            # Rename columns for proper merging
            rename_dict = {'First party': 'Third party', 'Second party': 'Fourth party'}
            df_2.rename(columns=rename_dict, inplace=True)
            
            # Merge the two DataFrames
            df = df_1.merge(df_2, on="Unnamed: 0", how="right")
        
        return df

    # Define years and formats
    years = [
        (1971, "Year_1971_Wiki_3", False, False),
        (1977, "Year_1977_Wiki_3", False, True),
        (1980, "Year_1980_Wiki_3", True, False),
        (1984, "Year_1984_Wiki_4", False, False),
        (1989, "Year_1989_Wiki_3", False, True),
        (1991, "Year_1991_Wiki_3", True, False),
        (1996, "Year_1996_Wiki_3", True, False),
        (2001, "Year_2001_Wiki_3", True, False),
        (2006, "Year_2006_Wiki_3", True, False),
        (2011, "Year_2011_Wiki_3", True, False),
        (2016, "Year_2016_Wiki_3", True, False),
    ]

    # List to store processed DataFrames
    processed_data_list = []

    for year, sheet, drop_row, four_party in years:
        df = process_year_data(year, sheet, drop_row, four_party)
        if df is not None:
            df['Year'] = year  # Add year column
            processed_data_list.append(df)

    # Combine all processed data into one DataFrame
    if processed_data_list:
        final_df = pd.concat(processed_data_list, ignore_index=True)
        print("Final dataset created!")
        return final_df
    else:
        print("No valid data found!")
        return None

Alliance Table

In [6]:
# Define the Alliance function
def Alliance(yearly_data):
    """Processes Tamil Nadu election data, assigns alliances, and returns a cleaned DataFrame."""

    # Define alliances for each year
    years_alliances = {
        1971: {"DMK+": ["Year_1971_Wiki_5"], "INC(O)+": ["Year_1971_Wiki_6"]},
        1977: {"ADMK+": ["Year_1977_Wiki_5"], "DMK": ["Year_1977_Wiki_6"], "INC+": ["Year_1977_Wiki_7"], "JP": ["Year_1977_Wiki_8"]},
        1980: {"ADMK+": ["Year_1980_Wiki_5"], "DMK+": ["Year_1980_Wiki_6"]},
        1989: {"DMK+": ["Year_1989_Wiki_5"], "ADMK+(jayalalitha)": ["Year_1989_Wiki_6"], "ADMK+(janaki)": ["Year_1989_Wiki_7"], "INC+": ["Year_1989_Wiki_8"]},
        1991: {"ADMK+": ["Year_1991_Wiki_5"], "DMK+": ["Year_1991_Wiki_6"]},
        1996: {"DMK+": ["Year_1996_Wiki_5"], "ADMK+": ["Year_1996_Wiki_6"], "MDMK+": ["Year_1996_Wiki_7"], "PMK+": ["Year_1996_Wiki_8"]},
        2006: {"DMK+": ["Year_2006_Wiki_5"], "ADMK+": ["Year_2006_Wiki_6"], "DMDK+": ["Year_2006_Wiki_7"], "BJP+": ["Year_2006_Wiki_8"]},
        2011: {"ADMK+": ["Year_2011_Wiki_5"], "DMK+": ["Year_2011_Wiki_6"], "BJP+": ["Year_2011_Wiki_7"]},
        2016: {"all alliance": ["Year_2016_Wiki_7"]}
    }

    # Function to clean and transform data
# Function to transform data
    def transform_data(df, year):
        df.columns = df.columns.str.strip()

        # Fix column name inconsistencies for 1971
        if year == 1971:
            if "Party.1" in df.columns and "Party" not in df.columns:
                df.rename(columns={"Party.1": "Party"}, inplace=True)
            if "Leader.1" in df.columns and "Leader" not in df.columns:
                df.rename(columns={"Leader.1": "Leader"}, inplace=True)

        # Apply transformations for different years
        if year in range(1971, 2002):  # 1971-2001
            df = df.drop(columns=["Party", "Election Symbol"], errors="ignore")
            df.rename(columns={"Party.1": "Party"}, inplace=True)
        
        elif year == 2006:
            df = df.drop(columns=["Unnamed: 0", "Election Symbol", "Seats Contested.1", "Seats Contested.2", "Seats Contested.3"], errors="ignore")
            df.rename(columns={'Seats Contested': 'Seats'}, inplace=True)
            if "No." not in df.columns:
                df.insert(0, "No.", range(1, len(df) + 1))
        
        elif year == 2011:
            df = df.drop(columns=["Election Symbol", "Party", "Ref.", "Unnamed: 7", "Seats Contested.1"], errors="ignore")
            df.rename(columns={"S.No": "No.", "Party.1": "Party", 'Seats Contested': 'Seats'}, inplace=True)

            # Ensure "No." column exists
            if "No." not in df.columns:
                df.insert(0, "No.", range(1, len(df) + 1))
        
        elif year == 2016:
            df = df.drop(columns=["Party/Alliance", "Party/Alliance.2", "Symbol"], errors="ignore")
            df.rename(columns={'Party/Alliance.1': 'Alliance', 'Party/Alliance.3': 'Party', "Contesting seats": "Seats"}, inplace=True)

            # Fix missing "No."
            if "No." not in df.columns:
                df.insert(0, "No.", range(1, len(df) + 1))


        # Remove rows with missing "Seats" values
        df.dropna(subset=["Seats"], inplace=True)
        df = df[~df["Seats"].astype(str).str.contains("Unregistered", na=False)]

        # Convert Seats to integer
        df["Seats"] = pd.to_numeric(df["Seats"], errors="coerce").fillna(0).astype(int)

        # Add Year column
        df["Year"] = year

        return df
    
    final_df = pd.DataFrame(columns=["No.", "Party", "Leader", "Alliance", "Seats", "Year"])


    # Process each year and alliance
    df_list = []
    for year, alliances in years_alliances.items():
        sheets = yearly_data.get(year, {})

        for alliance, sheet_names in alliances.items():
            for sheet_name in sheet_names:
                if sheet_name in sheets:
                    df = sheets[sheet_name]
                    df = transform_data(df, year)

                    # ‚úÖ Assign correct alliance name
                    df["Alliance"] = alliance  

                    # Collect data
                    df_list.append(df)

    # Concatenate all data
    final_df = pd.concat(df_list, ignore_index=True)
    final_df = final_df[["No.", "Party", "Leader", "Alliance", "Seats", "Year"]]
    print(final_df.head())
    # Step 1: Filter only 2016 data
    mask_2016 = final_df["Year"] == 2016
    df_2016 = final_df[mask_2016].copy()  # Copy to avoid modifying original DataFrame

    # Step 2: Identify rows where 'Seats' is a 3-digit number
    df_2016["Seats"] = df_2016["Seats"].astype(str)  # Convert to string for regex matching
    three_digit_rows = df_2016[df_2016["Seats"].str.match(r"^\d{3}$")].index  # Get indices of 3-digit rows

    # Step 3: Define alliance mapping for first four 3-digit rows
    alliance_order = ["AIADMK+", "DMK+", "DMDK+", "BJP+"]
    alliance_dict = {idx: alliance_order[i] for i, idx in enumerate(three_digit_rows[:4])}  # Assign alliances

    # Step 4: Propagate alliance names downward
    current_alliance = None
    for idx in df_2016.index:
        if idx in alliance_dict:
            current_alliance = alliance_dict[idx]  # Set new alliance from 3-digit row
        if current_alliance:
            df_2016.at[idx, "Alliance"] = current_alliance  # Assign alliance

    # Step 5: Convert 'Seats' back to integers
    df_2016["Seats"] = pd.to_numeric(df_2016["Seats"], errors="coerce").fillna(0).astype(int)

    # Step 6: Update final_df with modified 2016 data
    final_df.update(df_2016)

    return final_df

Breif_Result Table

In [7]:
def Breif_Result():
    
    # Define the years and their corresponding table keys
    yearly_keys = {
        1971: "Year_1971_Wiki_8",
        1977: "Year_1977_Wiki_9",
        1980: "Year_1984_Wiki_7",
        1984: "Year_1984_Wiki_6",
        1989: "Year_1989_Wiki_9",
        1991: "Year_1991_Wiki_7",
        1996: "Year_1996_Wiki_9",
        2001: "Year_2001_Wiki_6",
        2006: "Year_2006_Wiki_12",
        2011: "Year_2011_Wiki_11",
        2016: "Year_2016_Wiki_10"
    }



    # Define columns to drop for specific years
    drop_columns = {
        1971: ["Alliances","Party","Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        1977: ["Alliance/Party.1","Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        1980: ["Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        1984: ["Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        1989: ["Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
    #  
        1991: ["Unnamed: 1", "Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        1996: ["Unnamed: 1", "Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        2001: ["Unnamed: 1", "Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        2006: ["Unnamed: 1","Adj. %‚Ä°"],
        2011: ["Unnamed: 1", "Unnamed: 7", "Unnamed: 8", "Unnamed: 9","Adj. %‚Ä°"],
        2016: ["Party/Alliance_Party/Alliance", "Party/Alliance_Party/Alliance.1", "Party/Alliance_Party/Alliance.2","Unnamed: 9_level_0_Unnamed: 9_level_1"]
    }

    # Column rename mappings
    rename_columns = {
        1971: {'Party.1': 'Alliance/Party'},
        1989: {'Change‚Ä†': 'Change'},  # Ensure ‚Ä† is removed
        2011: {'Popular vote': 'Popular Vote'},
        2016: {
            'Party/Alliance_Party/Alliance.3': 'Alliance/Party',
            "Votes_Votes": "Popular Vote",
            "%_%": "Vote %",
            "Seats_Contested": "Seats contested",
            "Seats_Won": "Seats won",
            "Seats_+/-": "Change"
        }
    }

    # Row limits for specific years
    row_limits = {2011: 19, 2016: 24}
    
    # List to store processed DataFrames
    df_list = []

    # Process each year based on yearly_keys
    for year, key in yearly_keys.items():
        df = yearly_data.get(year, {}).get(key)
        
        if df is not None:
            # Strip all column names to remove extra spaces
            df.columns = df.columns.str.strip()
            df.columns = df.columns.str.replace(r'\s+', ' ', regex=True)
            df.columns = df.columns.str.replace('‚Ä†', '', regex=False)  # Remove special character

            # Debugging: Print actual column names
            print(f"üîç Columns before renaming for {year}: {df.columns.tolist()}")

            # Drop unnecessary columns
            df.drop(columns=[col for col in drop_columns.get(year, []) if col in df.columns], errors='ignore', inplace=True)

            # Rename columns based on year
            if year in rename_columns:
                rename_dict = rename_columns[year]
                df.rename(columns={col: rename_dict[col] for col in df.columns if col in rename_dict}, inplace=True)

            # Trim rows if applicable
            if year in row_limits:
                df = df.iloc[:row_limits[year]]

            # Add 'Year' column
            df["Year"] = year

            # Append processed DataFrame
            df_list.append(df)
            print(f"‚úÖ Processed {key} for {year}")

    # Concatenate all DataFrames into a single final DataFrame
    final_df = pd.concat(df_list, ignore_index=True)
    
    # Define the correct order of columns
    desired_columns = [ "Alliance/Party", "Seats contested", "Seats won","Change","Popular Vote", "Vote %",  "Year"]

    # Ensure only available columns are selected (avoiding KeyErrors)
    final_df = final_df.drop("Unnamed: 1",axis=True)
    final_df = final_df[[col for col in desired_columns if col in final_df.columns]]
    
    return final_df
    

Total_Result Table

In [8]:
def Total_Result():
   
    # Define the years and their corresponding table keys
    yearly_sheets = {
        1971: "Year_1971_Wiki_9",
        1977: "Year_1977_Wiki_10",
        1980: "Year_1980_Wiki_8",
        1984: "Year_1984_Wiki_7",
        1989: "Year_1989_Wiki_10",
        1991: "Year_1991_Wiki_8",
        1996: "Year_1996_Wiki_10",
        2001: "Year_2001_Wiki_7",
        2006: "Year_2006_Wiki_13",
        2011: "Year_2011_Wiki_12",
        2016: "Year_2016_Wiki_11",
    }



    # Define columns to drop for each year
    columns_to_drop = {
        1971: ["Winner_Party", "Runner Up_Party"],
        1977: ["Winner_Party", "Runner Up_Party"],
        1980: ["Winner_Party", "Runner Up_Party"],
        1984: ["Winner_Party", "Runner Up_Party"],
        1989: ["Party", "Party.2"],
        1991: [],
        1996: ["Party", "Party.2"],
        2001: ["Party"],
        2006: ["Party"],
        2011: ["Winner_Party", "Runner Up_Party"],
        2016: ["Winner_Party", "Runner Up_Party"]
    }
    # Function to rename columns based on year
    def rename_columns(df, year):
        rename_dict = {
            1971: {'Assembly Constituency_#k': 'Assembly Constituency_#k', 'Assembly Constituency_Name': 'Assembly Constituency_Name', 'Winner_Candidate': 'Winner_Candidate', 'Winner_Party.1': 'Winner_Party', 'Winner_Votes': 'Winner_Votes', 'Winner_%': 'Winner_%', 'Runner Up_Candidate': 'Runner Up_Candidate', 'Runner Up_Party.1': 'Runner Up_Party', 'Runner Up_Votes': 'Runner Up_Votes', 'Runner Up_%': 'Runner Up_%', 'Margin_Margin': 'Margin'},
            1977: {'Assembly Constituency_#k': 'Assembly Constituency_#k', 'Assembly Constituency_Name': 'Assembly Constituency_Name', 'Winner_Candidate': 'Winner_Candidate', 'Winner_Party.1': 'Winner_Party', 'Winner_Votes': 'Winner_Votes', 'Winner_%': 'Winner_%', 'Runner Up_Candidate': 'Runner Up_Candidate', 'Runner Up_Party.1': 'Runner Up_Party', 'Runner Up_Votes': 'Runner Up_Votes', 'Runner Up_%': 'Runner Up_%', 'Margin_Margin': 'Margin'},
            1980: {'Assembly Constituency_#k': 'Assembly Constituency_#k', 'Assembly Constituency_Name': 'Assembly Constituency_Name', 'Winner_Candidate': 'Winner_Candidate', 'Winner_Party.1': 'Winner_Party', 'Winner_Votes': 'Winner_Votes', 'Winner_%': 'Winner_%', 'Runner Up_Candidate': 'Runner Up_Candidate', 'Runner Up_Party.1': 'Runner Up_Party', 'Runner Up_Votes': 'Runner Up_Votes', 'Runner Up_%': 'Runner Up_%', 'Margin_Margin': 'Margin'},
            1984: {'Assembly Constituency_#k': 'Assembly Constituency_#k', 'Assembly Constituency_Name': 'Assembly Constituency_Name', 'Winner_Candidate': 'Winner_Candidate', 'Winner_Party.1': 'Winner_Party', 'Winner_Votes': 'Winner_Votes', 'Winner_%': 'Winner_%', 'Runner Up_Candidate': 'Runner Up_Candidate', 'Runner Up_Party.1': 'Runner Up_Party', 'Runner Up_Votes': 'Runner Up_Votes', 'Runner Up_%': 'Runner Up_%', 'Margin_Margin': 'Margin'},
            1989: {'Assembly Constituency': 'Assembly Constituency_Name', 'Winner': 'Winner_Candidate', 'Party.1': 'Winner_Party', 'Runner Up': 'Runner Up_Candidate', 'Party.3': 'Runner Up_Party', 'Margin': 'Margin'},
            1991: {'Assembly Constituency': 'Assembly Constituency_Name', 'Winner': 'Winner_Candidate', 'Party': 'Winner_Party', 'Runner-up': 'Runner Up_Candidate', 'Party': 'Runner Up_Party', 'Margin': 'Margin'},
            1996: {'Assembly Constituency': 'Assembly Constituency_Name', 'Winner': 'Winner_Candidate', 'Party.1': 'Winner_Party', 'Runner-up': 'Runner Up_Candidate', 'Party.3': 'Runner Up_Party', 'Margin': 'Margin'},
            2001: {'Assembly constituency': 'Assembly Constituency_Name', 'Party.1': 'Winner_Party', 'Winner': 'Winner_Candidate', 'Runner-up': 'Runner Up_Candidate', 'Party.2': 'Runner Up_Party', 'Margin': 'Margin'},
            2006: {'#': 'Assembly Constituency_#k', 'Assembly Constituency': 'Assembly Constituency_Name', 'Party.1': 'Winner_Party', 'Winner': 'Winner_Candidate', 'Runner-up': 'Runner Up_Candidate', 'Party.2': 'Runner Up_Party'}
        }
        return df.rename(columns=rename_dict.get(year, {}))
    # Function to process the DataFrame
    def process_dataframe(df, year, sheet_name):
        # Check and adjust the first row as header if necessary
        if df.columns.isnull().any():
            df.columns = df.iloc[0]  # Set first row as header
            df = df.drop(index=0).reset_index(drop=True)  # Drop first row

        # Drop unwanted columns
        drop_cols = columns_to_drop.get(year, [])
        df = df.drop(columns=[col for col in drop_cols if col in df.columns], errors="ignore")

        # Ensure "Assembly Constituency_#k" exists
        if "Assembly Constituency_#k" not in df.columns:
            df["Assembly Constituency_#k"] = df.reset_index().index + 1  

        # Add Election Year and Unique ID
        df["Election Year"] = year
        df["Election Unique ID"] = df["Election Year"].astype(str) + "_" + df["Assembly Constituency_#k"].astype(str).str.zfill(3)

        return df

    # Define final required columns, including Election Year and Election Unique ID
    desired_columns = [
        'Election Year', 'Election Unique ID', 'Assembly Constituency_#k', 
        'Assembly Constituency_Name', 'Winner_Candidate', 'Winner_Party', 
        'Winner_Votes', 'Winner_%', 'Runner Up_Candidate', 'Runner Up_Party', 
        'Runner Up_Votes', 'Runner Up_%', 'Margin'
    ]
    # Main loop for processing each sheet
    df_list = []  # List to store processed DataFrames
    for year, sheet_name in yearly_sheets.items():
        df = yearly_data[year].get(sheet_name)

        if df is not None:
            print(f"\nProcessing: {sheet_name} for year {year}")  # Debugging log

            # Rename columns based on year
            df = rename_columns(df, year)

            # Print columns before processing
            print(f"Columns before processing for {sheet_name} ({year}):\n{df.columns.tolist()}")

            # Process DataFrame
            df = process_dataframe(df, year, sheet_name)

            # Print columns after processing
            print(f"Columns after processing for {sheet_name} ({year}):\n{df.columns.tolist()}")

            # Append the processed DataFrame to the list
            df_list.append(df)

    # Concatenate final DataFrame
    final_df = pd.concat(df_list, ignore_index=True)
    # Ensure final_df contains only the desired columns
    final_df = final_df[[col for col in desired_columns if col in final_df.columns]]
          
    
    return final_df 
    

In [9]:
# Run Leaders() to process data
final_leaders_df = Leaders()
final_alliance_df = Alliance(yearly_data)
final_brief_result_df = Breif_Result()
final_total_result_df = Total_Result()



Final dataset created!
  No.                       Party               Leader Alliance  Seats  Year
0   1   Dravida Munnetra Kazhagam       M. Karunanidhi     DMK+    203  1971
1   2    Communist Party of India         Tha. Pandian     DMK+     10  1971
2   3      All India Forward Bloc  P.K. Mookiah Thevar     DMK+      9  1971
3   4       Praja Socialist Party                  NaN     DMK+      4  1971
4   5  Indian Union Muslim League      Muhammed Ismail     DMK+      2  1971
üîç Columns before renaming for 1971: ['Alliances', 'Party', 'Party.1', 'Popular Vote', 'Vote %', 'Seats contested', 'Seats won', 'Change', 'Unnamed: 8', 'Unnamed: 9']
‚úÖ Processed Year_1971_Wiki_8 for 1971
üîç Columns before renaming for 1977: ['Alliance/Party', 'Alliance/Party.1', 'Seats won', 'Change', 'Popular Vote', 'Vote %', 'Adj. %‚Ä°', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9']
‚úÖ Processed Year_1977_Wiki_9 for 1977
üîç Columns before renaming for 1984: ['Alliance/Party', 'Unnamed: 1', 'Seats won',

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Year'] = year  # Add year column
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Year'] = year  # Add year column
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Year'] = year  # Add year column
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,

In [10]:
display(final_leaders_df)

Unnamed: 0.1,Unnamed: 0,First party,Second party,Year,Third party,Fourth party
0,Leader,M. Karunanidhi,K. Kamaraj,1971,,
1,Party,DMK,INC(O),1971,,
2,Leader's¬†seat,Saidapet,Did Not Contest[a],1971,,
3,Seats¬†won,205,21,1971,,
4,Seat¬†change,61[1],1[1],1971,,
...,...,...,...,...,...,...
72,Seats¬†won,136,98,2016,0,
73,Seat¬†change,67,67,2016,29,
74,Popular¬†vote,17617060,17175374,2016,2621297,
75,Percentage,40.88%,39.85%,2016,6.10%,


In [11]:
display(final_alliance_df)

Unnamed: 0,No.,Party,Leader,Alliance,Seats,Year
0,1,Dravida Munnetra Kazhagam,M. Karunanidhi,DMK+,203,1971
1,2,Communist Party of India,Tha. Pandian,DMK+,10,1971
2,3,All India Forward Bloc,P.K. Mookiah Thevar,DMK+,9,1971
3,4,Praja Socialist Party,,DMK+,4,1971
4,5,Indian Union Muslim League,Muhammed Ismail,DMK+,2,1971
...,...,...,...,...,...,...
118,20,Tamil Maanila Congress,G. K. Vasan,DMDK+,26,2016
119,21,Viduthalai Chiruthaigal Katchi,Thol. Thirumavalavan,DMDK+,25,2016
120,22,Bharatiya Janata Party,Tamilisai Soundararajan,BJP+,165,2016
121,23,Akila Indhiya Makkal Munnetra Kazhagam,,BJP+,24,2016


In [12]:
display(final_brief_result_df)

Unnamed: 0,Alliance/Party,Seats contested,Seats won,Change,Popular Vote,Vote %,Year
0,Dravida Munnetra Kazhagam,203,184,47,7654935,48.58%,1971
1,Communist Party of India,10,8,6,364803,2.32%,1971
2,All India Forward Bloc,9,7,6,268721,1.71%,1971
3,Praja Socialist Party,4,4,‚Äî,147985,0.94%,1971
4,Indian Union Muslim League,2,2,1,69634,0.44%,1971
...,...,...,...,...,...,...,...
167,Bahujan Samaj Party,,0,,97823.0,0.23%,2016
168,Social Democratic Party of India,,0,,65978.0,0.15%,2016
169,Independents,234,0,,617907.0,1.44%,2016
170,None of the above,234,‚Äì,‚Äì,565077.0,1.31%,2016


In [13]:
display(final_total_result_df)

Unnamed: 0,Election Year,Election Unique ID,Assembly Constituency_#k,Assembly Constituency_Name,Winner_Candidate,Winner_Party,Winner_Votes,Winner_%,Runner Up_Candidate,Runner Up_Party,Runner Up_Votes,Runner Up_%,Margin
0,1971,1971_001,1,Washermanpet,M. Vedachalam,,38989.0,54.04,Ananthan,,32231.0,44.68,6758.0
1,1971,1971_002,2,Harbour,A M Mohideen,,29225.0,49.44,Umapathy G,,28739.0,48.62,486.0
2,1971,1971_003,3,Basin Bridge,M. R. Kannan,,48959.0,56.73,K. Ramadoss,,33174.0,38.44,15785.0
3,1971,1971_004,4,Park Town,H. V. Hande,,30743.0,54.70,A. V. P. Asaithambi,,25456.0,45.30,5287.0
4,1971,1971_005,5,Perambur,Sathyavani Muthu,,49070.0,56.37,D Sulochana,,37047.0,42.56,12023.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2560,2016,2016_230,230,Nagercoil,N. Suresh Rajan,,67369.0,38.87,Gandhi M.R,,46413.0,26.78,
2561,2016,2016_231,231,Colachel,J. G. Prince,,67195.0,40.19,Ramesh P,,41167.0,24.62,
2562,2016,2016_232,232,Padmanabhapuram,Mano Thangaraj,,76249.0,47.20,Rajendra Prasad K P,,35344.0,21.88,
2563,2016,2016_233,233,Vilavancode,S. Vijayadharani,,68789.0,42.43,Dharmaraj C,,35646.0,21.98,


In [14]:
# Save the consolidated data to an Excel file
output_path = 'fulldata/cleaned/Cleaned_Wikipedia_Data.xlsx'
with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    final_leaders_df.to_excel(writer, sheet_name="Leaders Table", index=False)
    final_alliance_df.to_excel(writer, sheet_name="Alliance Table", index=False)
    final_brief_result_df.to_excel(writer, sheet_name="Brief Result Table", index=False)
    final_total_result_df.to_excel(writer, sheet_name="Total Result Table", index=False)

print(f"Data has been successfully saved to {output_path}")

Data has been successfully saved to fulldata/cleaned/Cleaned_Wikipedia_Data.xlsx
