In [1]:
import pandas as pd
import os

# Set up the data directory
data_dir = "data"

def load_verifier_data(data_dir):
    """Load all verifier data files from the data directory."""
    all_data = []
    
    for folder in sorted(os.listdir(data_dir)):
        folder_path = os.path.join(data_dir, folder)
        machines = os.path.join(folder_path, "verifier-machines.csv")
        jurisdictions = os.path.join(folder_path, "verifier-jurisdictions.csv")
        
        if os.path.isfile(machines):
            df = pd.read_csv(machines, skiprows=1, index_col=False, converters={'FIPS code': str})  # Skip the first two descriptive rows
            df['Year'] = folder.split('_')[0]  # Extract the year from folder name

            jurisdictions = pd.read_csv(jurisdictions, skiprows=1, index_col=False, converters={'FIPS code': str})  # Skip the first two descriptive rows
            df = pd.merge(df, jurisdictions[['FIPS code', 'Registered Voters']],
                     on='FIPS code',
                     how='left')

            all_data.append(df)
    
    return pd.concat(all_data, ignore_index=True)

# Load data
df = load_verifier_data(data_dir)

# Convert columns to appropriate types
df['Year'] = df['Year'].astype(int)
df['First Year in Use'] = df['First Year in Use'].astype('Int64')

# Create a new column to identify new equipment

def is_new_equipment(row):
    return row['First Year in Use'] == row['Year']

df['New Equipment'] = df.apply(is_new_equipment, axis=1)
df['First Year in Use'] = abs(df['First Year in Use']) # some years are negative, make them positive
df['Years in Use'] = df['Year'] - df['First Year in Use']
df['Registered Voters'] = df['Registered Voters'].astype('Int64')

df.sort_values(by=["FIPS code", "Equipment Type", "Year", "Manufacturer", "Model"], inplace=True)

# Initialize the "year_retired" column to False
df["Final Year in Use"] = False

# Group by the combination of FIPS code, Manufacturer, and Model
grouped = df.groupby(["FIPS code", "Manufacturer", "Model"])

# Iterate over each group
for _, group in grouped:
    # Get the index of the last record in the sorted group
    last_idx = group.index[-1]
    # Mark as 'final_year_in_use' if this last record's Year is not 2026
    if df.loc[last_idx, "Year"] != 2026:
        df.loc[last_idx, "Final Year in Use"] = True

reorder = [ ('Registered Voters', 3), ('Year', 6), ('First Year in Use', 7), ('Years in Use', 8), ('New Equipment', 9), ('Final Year in Use', 10)]
for col, new_position in reorder:
    # Move the column to the new position
    col = df.pop(col)
    df.insert(new_position, col.name, col)

# Save the cleaned data to a new CSV file
output_file = os.path.join(data_dir, "cleaned_verifier_data.csv")
df.to_csv(output_file, index=False)
