# ODD Data Processing for Any Department

This notebook processes ODD data for a specified department. It's divided into several sections, each performing a specific task in the data processing pipeline.

## Import Libraries and Define Helper Functions

This cell imports necessary libraries and defines helper functions for data processing.

In [17]:
def process_odd_data(department, odd_folder, output_dir, mapping_dir, final_dir):
    # Load data
    combined_data = load_odd_data(odd_folder)
    if combined_data is None:
        print("No valid data found in Excel files.")
        return

    # Filter data for the specified department
    department_odd_data = combined_data[combined_data['DEPTO'] == department].copy()
    department_odd_data.to_csv(os.path.join(output_dir, f'{department.lower()}_odd_filtered.csv'), index=False, encoding='utf-8')

    # Clean and transform data
    department_odd_data.rename(columns={'CONVOCATORIA': 'PARTIDO'}, inplace=True)
    department_odd_data['PARTIDO'] = department_odd_data['PARTIDO'].apply(clean_party_name)
    department_odd_data.to_csv(os.path.join(output_dir, f'{department.lower()}_odd_cleaned.csv'), index=False, encoding='utf-8')

    if 'ACTO' in department_odd_data.columns:
        department_odd_data = department_odd_data.drop(columns=['ACTO'])
        department_odd_data.to_csv(os.path.join(output_dir, f'{department.lower()}_odd_no_acto.csv'), index=False, encoding='utf-8')

    # Add zone information
    mapping_file = os.path.join(mapping_dir, f'mapping_{department.lower()}.json')
    with open(mapping_file, 'r', encoding='utf-8') as f:
        series_to_zona = json.load(f)
    department_odd_data['ZONA'] = department_odd_data['SERIES'].apply(lambda x: get_zone(x, series_to_zona))
    
    # Save final data
    department_odd_data.to_csv(os.path.join(final_dir, f'{department.lower()}_odd_final.csv'), index=False, encoding='utf-8')

    print(f"Processed ODD data for {department}:")
    print(department_odd_data.head())
    print(f"\nSaved processed data to:")
    print(f"- {output_dir}/{department.lower()}_odd_filtered.csv")
    print(f"- {output_dir}/{department.lower()}_odd_cleaned.csv")
    if 'ACTO' in combined_data.columns:
        print(f"- {output_dir}/{department.lower()}_odd_no_acto.csv")
    print(f"- {final_dir}/{department.lower()}_odd_final.csv")

## Process ODD Data for a Specific Department

You can change the department name, input folder, and output directory in the cell below to process data for different departments.

In [18]:
import os

# Get the project root directory
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

department = "Colonia"  # Change this to process different departments
odd_folder = os.path.join(project_root, "data", "raw", "results", "ODD")
output_dir = os.path.join(project_root, "data", "processed", "ODD", department)
mapping_dir = os.path.join(project_root, "maps", "zonasxseries")
final_dir = os.path.join(project_root, "data", "final", "odd", department)

# Ensure directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(final_dir, exist_ok=True)

process_odd_data(department, odd_folder, output_dir, mapping_dir, final_dir)

Processed ODD data for Colonia:
               PARTIDO    DEPTO CIRCUITO SERIES     ESCRUTINIO  HOJA  \
1234  Asamblea Popular  Colonia        6    NAB  Departamental  1326   
1235  Asamblea Popular  Colonia       11    NAB  Departamental  1326   
1236  Asamblea Popular  Colonia       13    NAB  Departamental  1326   
1237  Asamblea Popular  Colonia       16    NAB  Departamental  1326   
1238  Asamblea Popular  Colonia       47    NAD  Departamental  1326   

     CNT_VOTOS                                   ZONA  
1234         1  Pueblo Nuevo - Colonia del Sacramento  
1235         1  Pueblo Nuevo - Colonia del Sacramento  
1236         1  Pueblo Nuevo - Colonia del Sacramento  
1237         1  Pueblo Nuevo - Colonia del Sacramento  
1238         1                     Real de San Carlos  

Saved processed data to:
- c:\Users\trico\Desktop\Internas 2024\data\processed\ODD\Colonia/colonia_odd_filtered.csv
- c:\Users\trico\Desktop\Internas 2024\data\processed\ODD\Colonia/colonia_odd_clea

  return pd.concat(all_data, ignore_index=True) if all_data else None
