# Merge Election and Cabinet Data
This notebook merges each election with the next cabinet formed after the election, including all cabinet variables.

In [7]:
import pandas as pd

# Load Excel files
cabinet_file = "Cabinet_data.xlsx"
election_file = "election_data_weights.xlsx"

cabinet_df = pd.read_excel(cabinet_file)
election_df = pd.read_excel(election_file)

# Convert date columns to datetime
cabinet_df["gov_start_date"] = pd.to_datetime(cabinet_df["gov_start_date"])
election_df["election_date"] = pd.to_datetime(election_df["election_date"])

# Sort for consistency
cabinet_df.sort_values(by=["countryID", "gov_start_date"], inplace=True)
election_df.sort_values(by=["countryID", "election_date"], inplace=True)

# Filter out Austria 2002, Denmark 2007 and before, and Finland 2007
election_df = election_df[~(
    ((election_df['countryID'] == 'AT') & (election_df['year'] == 2002)) |
    ((election_df['countryID'] == 'DK') & (election_df['year'] <= 2007)) |
    ((election_df['countryID'] == 'FI') & (election_df['year'] == 2007))
)]

In [8]:
# Merge logic
merged_rows = []

for (country, election_date), group in election_df.groupby(["countryID", "election_date"]):
    cabinets_country = cabinet_df[cabinet_df["countryID"] == country]
    next_cabinet_date = cabinets_country[cabinets_country["gov_start_date"] > election_date]["gov_start_date"].min()
    
    if pd.isna(next_cabinet_date):
        continue

    matching_cabinet = cabinets_country[cabinets_country["gov_start_date"] == next_cabinet_date]

    merged = pd.merge(
        group,
        matching_cabinet,
        on="partyID",
        how="left",
        suffixes=("", "_cabinet")
    )

    merged_rows.append(merged)

final_df = pd.concat(merged_rows, ignore_index=True)

In [9]:
final_df['F'] = None

In [10]:

# Calculate total seats and voting weight of government parties per election
gov_parties_df = final_df[final_df['cab_pos'] > 0]
group_cols = ['election_date', 'countryID']
gov_totals = gov_parties_df.groupby(group_cols).agg(
    total_gov_seats=('seats', 'sum'),
    total_gov_voting_weight=('voting_weight', 'sum')
).reset_index()

# Merge and compute shares
df = final_df.merge(gov_totals, on=group_cols, how='left')
df['gov_seat_share'] = df.apply(
    lambda row: row['seats'] / row['total_gov_seats'] if row['cab_pos'] > 0 else None,
    axis=1
)
df['gov_weight_share'] = df.apply(
    lambda row: row['voting_weight'] / row['total_gov_voting_weight'] if row['cab_pos'] > 0 else None,
    axis=1
)
df.drop(columns=['total_gov_seats', 'total_gov_voting_weight'], inplace=True)

In [11]:
# Save to Excel
df.to_excel("Final_merge.xlsx", index=False)
print("✅ Merging complete! Output saved as 'full_merged_election_cabinet_data.xlsx'")

✅ Merging complete! Output saved as 'full_merged_election_cabinet_data.xlsx'
