In [13]:
import os
import pandas as pd
from datetime import datetime

In [14]:
def process_delegates_and_acc_data(acc_data, delegates, acc_delegate_address, filename):
    """
    Process delegates and ACC data by distributing voting power and removing ACC delegate.

    Parameters:
    - acc_data (pd.DataFrame): Dataframe containing the ACC data.
    - delegates (pd.DataFrame): Dataframe containing the delegate data.
    - acc_delegate_address (str): Address of the ACC delegate.
    - filename (str): Filename of the delegates CSV file to extract the file date.

    Returns:
    - pd.DataFrame: Updated dataframe of delegates.
    """
    # Extract the file date from the filename
    file_date = datetime.strptime(filename[:-4], '%Y-%m-%d')
    
    # Ensure dates in ACC data are in datetime format
    acc_data.loc[:, 'start_date'] = pd.to_datetime(acc_data['start_date'], format='%d-%m-%Y')
    acc_data.loc[:, 'end_date'] = pd.to_datetime(acc_data['end_date'], format='%d-%m-%Y')
    
    # Check if ACC delegate address is present in the delegates data
    acc_delegate_row = delegates[delegates['delegate'].str.lower() == acc_delegate_address.lower()]
    if acc_delegate_row.empty:
        # If ACC delegate is not present, return the dataframe as it is
        print("ACC delegate not found in the delegates data. Returning unmodified dataframe.")
        return delegates

    # Filter ACC data by season
    acc_season5 = acc_data[acc_data['season'] == 5]
    acc_season6 = acc_data[acc_data['season'] == 6]
    acc_season7 = acc_data[acc_data['season'] == 7]

    # Determine the active season based on the file date
    if (file_date >= acc_season5['start_date'].iloc[0]) and (file_date <= acc_season5['end_date'].iloc[0]):
        active_season = acc_season5
    elif (file_date >= acc_season6['start_date'].iloc[0]) and (file_date <= acc_season6['end_date'].iloc[0]):
        active_season = acc_season6
    elif (file_date >= acc_season7['start_date'].iloc[0]) and (file_date <= acc_season7['end_date'].iloc[0]):
        active_season = acc_season7
    else:
        # If file date does not fall within any season, return the dataframe as it is
        print(f"File date {file_date} does not fall within any season. Returning unmodified dataframe.")
        return delegates

    # Standardize address formats to lowercase
    active_season['address'] = active_season['address'].str.lower()
    delegates['delegate'] = delegates['delegate'].str.lower()
    
    # Identify missing members from the active season and add them to the delegates dataframe
    missing_members = active_season[~active_season['address'].isin(delegates['delegate'])]
    missing_members_to_add = missing_members[['address']].rename(columns={'address': 'delegate'})
    missing_members_to_add['voting_power'] = 0
    updated_delegates = pd.concat([delegates, missing_members_to_add], ignore_index=True)
    
    # Get the ACC delegate's voting power
    acc_delegate_voting_power = acc_delegate_row['voting_power'].iloc[0]
    
    # Distribute the ACC delegate's voting power among active season members
    total_members = len(active_season)
    voting_power_per_member = acc_delegate_voting_power / total_members
    updated_delegates.loc[updated_delegates['delegate'].isin(active_season['address']),
                          'voting_power'] += voting_power_per_member
    
    # Remove the ACC delegate from the dataset
    updated_delegates = updated_delegates[updated_delegates['delegate'] != acc_delegate_address.lower()]
    
    # Sort the dataframe by voting power in descending order
    sorted_data = updated_delegates.sort_values(by='voting_power', ascending=False).reset_index(drop=True)
    
    print(f"Processed {filename}: Distributed ACC delegate power and removed the delegate.")
    return sorted_data


In [15]:
# Process all delegate files in a folder
def process_all_delegate_files(acc_file_path, delegates_folder_path, acc_delegate_address, output_folder):
    """
    Process all delegate files in the specified folder and save the updated files to an output folder.

    Parameters:
    - acc_file_path (str): Path to the ACC data CSV file.
    - delegates_folder_path (str): Path to the folder containing delegate data files.
    - acc_delegate_address (str): Address of the ACC delegate.
    - output_folder (str): Path to the folder where updated files will be saved.
    """
    # Load the ACC data
    acc_data = pd.read_csv(acc_file_path, encoding='latin1')
    
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)
    
    # Iterate through all CSV files in the delegates folder
    for file_name in os.listdir(delegates_folder_path):
        if file_name.endswith(".csv"):
            file_path = os.path.join(delegates_folder_path, file_name)
            print(f"Processing file: {file_name}")
            
            # Load the delegates data
            delegates = pd.read_csv(file_path)
            
            # Process the delegates data
            try:
                updated_delegates = process_delegates_and_acc_data(acc_data, delegates, acc_delegate_address, file_name)
                
                # Save the updated data to the output folder
                output_file_path = os.path.join(output_folder, file_name)
                updated_delegates.to_csv(output_file_path, index=False)
                print(f"Updated file saved: {output_file_path}")
            except ValueError as e:
                print(f"Skipping file {file_name} due to error: {e}")

In [None]:
# Example usage
acc_file_path = "../Data/Councils_and_Committees_Data/Anticapture_Commission.csv"
delegates_folder_path = "../Data/Delegates_Data"
output_folder = "../Data/Delegates_Data_with_ACC_Members"
acc_delegate_address = "0x3eee61b92c36e97be6319bf9096a1ac3c04a1466"

process_all_delegate_files(acc_file_path, delegates_folder_path, acc_delegate_address, output_folder)