In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from jinja2 import Environment, FileSystemLoader

# Load the aggregated data
csv_file_path = 'C:/Proyectos/Elecciones-2024/data/Agregados/Presidencial/TODO_pres_Seccion_aggregated.csv' 
aggregated_data = pd.read_csv(csv_file_path)

# Ensure the plots directory exists
plots_dir = 'C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion'
os.makedirs(plots_dir, exist_ok=True)

# Generate the overall plot
total_votos_calculados_sum = aggregated_data['TOTAL_VOTOS_CALCULADOS'].sum()
lista_nominal_sum = aggregated_data['LISTA_NOMINAL'].sum()
participation_percentage = (total_votos_calculados_sum / lista_nominal_sum) * 100

plt.figure(figsize=(14, 8))

# MORENA Coalition
sns.regplot(x='PARTICIPATION_PERCENTAGE', y='MORENA_COALITION_PERCENTAGE', data=aggregated_data,
            scatter_kws={'color': 'darkred', 'alpha': 0.1}, line_kws={'color': 'darkred'}, label='MORENA Coalition')

# Opposition Coalition
sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OPPOSITION_COALITION_PERCENTAGE', data=aggregated_data,
            scatter_kws={'color': 'blue', 'alpha': 0.1}, line_kws={'color': 'blue'}, label='Opposition Coalition')

# Others
sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OTHERS_PERCENTAGE', data=aggregated_data,
            scatter_kws={'color': 'orange', 'alpha': 0.1}, line_kws={'color': 'orange'}, label='Others')

# Setting labels and title
plt.xlabel('Participation Percentage')
plt.ylabel('Vote Percentage')
plt.title('Vote Percentage vs Participation Percentage by Coalition for All Data')
plt.xlim(50, 100)  # Start the x-axis at 50
plt.ylim(0, 100)  # Ensure the y-axis ranges from 0 to 100

# Add participation percentage and sum of LISTA_NOMINAL to the plot with comma-separated thousands
plt.text(55, 90, f'Participation: {participation_percentage:.2f}%', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
plt.text(55, 85, f'Sum of LISTA_NOMINAL: {lista_nominal_sum:,.0f}', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))

plt.legend()
plt.grid(True)

# Save the overall plot
overall_plot_path = os.path.join(plots_dir, 'Vote_Percentage_vs_Participation_All_Data_Estado.png')
plt.savefig(overall_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f'Saved overall plot at {overall_plot_path}')

# Ensure ENTIDAD is in the data
if 'ENTIDAD' in aggregated_data.columns:
    unique_entidades = aggregated_data['ENTIDAD'].unique()
    plot_file_paths = [{'id_entidad': 'All', 'entidad_name': 'All Data', 'file_path': overall_plot_path}]

    for entidad in unique_entidades:
        subset_data = aggregated_data[aggregated_data['ENTIDAD'] == entidad]
        entidad_name = entidad  # Assuming the name is the same within the same ID

        # Calculate PARTICIPATION_PERCENTAGE as the ratio of the sum of TOTAL_VOTOS_CALCULADOS to the sum of LISTA_NOMINAL
        total_votos_calculados_sum = subset_data['TOTAL_VOTOS_CALCULADOS'].sum()
        lista_nominal_sum = subset_data['LISTA_NOMINAL'].sum()
        participation_percentage = (total_votos_calculados_sum / lista_nominal_sum) * 100

        plt.figure(figsize=(14, 8))

        # MORENA Coalition
        sns.regplot(x='PARTICIPATION_PERCENTAGE', y='MORENA_COALITION_PERCENTAGE', data=subset_data,
                    scatter_kws={'color': 'darkred', 'alpha': 0.12}, line_kws={'color': 'darkred'}, label='MORENA Coalition')

        # Opposition Coalition
        sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OPPOSITION_COALITION_PERCENTAGE', data=subset_data,
                    scatter_kws={'color': 'blue', 'alpha': 0.12}, line_kws={'color': 'blue'}, label='Opposition Coalition')

        # Others
        sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OTHERS_PERCENTAGE', data=subset_data,
                    scatter_kws={'color': 'orange', 'alpha': 0.12}, line_kws={'color': 'orange'}, label='Others')

        # Setting labels and title
        plt.xlabel('Participation Percentage')
        plt.ylabel('Vote Percentage')
        plt.title(f'Vote Percentage vs Participation Percentage by Coalition\nin {entidad_name}')
        plt.xlim(50, 100)  # Start the x-axis at 50
        plt.ylim(0, 100)  # Ensure the y-axis ranges from 0 to 100

        # Add participation percentage and sum of LISTA_NOMINAL to the plot with comma-separated thousands
        plt.text(55, 90, f'Participation: {participation_percentage:.2f}%', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
        plt.text(55, 85, f'Sum of LISTA_NOMINAL: {lista_nominal_sum:,.0f}', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))

        plt.legend()
        plt.grid(True)

        # Save the plot
        plot_file_path = os.path.join(plots_dir, f'Vote_Percentage_vs_Participation_{entidad}.png')
        plt.savefig(plot_file_path, dpi=300, bbox_inches='tight')
        plt.close()

        plot_file_paths.append({
            'id_entidad': entidad,
            'entidad_name': entidad_name,
            'file_path': plot_file_path
        })

        print(f'Saved plot for ENTIDAD {entidad} at {plot_file_path}')

else:
    print("Column 'ENTIDAD' not found in the dataset.")

# Generate the HTML content
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template('template.html')

html_content = template.render(plots=plot_file_paths)

# Save the HTML content to a file with UTF-8 encoding
output_html_path = 'C:/Proyectos/Elecciones-2024/plots/index.html'
with open(output_html_path, 'w', encoding='utf-8') as f:
    f.write(html_content)

print(f'Saved HTML file at {output_html_path}')


  y *= step
  return function_base._ureduce(a,
  diff_b_a = subtract(b, a)


Saved overall plot at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Participation_All_Data_Estado.png
Saved plot for ENTIDAD AGUASCALIENTES at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Participation_AGUASCALIENTES.png
Saved plot for ENTIDAD BAJA CALIFORNIA at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Participation_BAJA CALIFORNIA.png
Saved plot for ENTIDAD BAJA CALIFORNIA SUR at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Participation_BAJA CALIFORNIA SUR.png
Saved plot for ENTIDAD CAMPECHE at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Participation_CAMPECHE.png
Saved plot for ENTIDAD COAHUILA at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Participation_COAHUILA.png
Saved plot for ENTIDAD COLIMA at C:/Proyectos/Elecciones-2024/plots/Entidades/Participacion\Vote_Percentage_vs_Partici

In [2]:
aggregated_data.columns


Index(['ID_ENTIDAD', 'ENTIDAD', 'ID_DISTRITO_FEDERAL', 'DISTRITO_FEDERAL',
       'SECCION', 'PVEM', 'PT', 'MORENA', 'PVEM_PT_MORENA', 'PVEM_PT',
       'PVEM_MORENA', 'PT_MORENA', 'PAN', 'PRI', 'PRD', 'PAN_PRI_PRD',
       'PAN_PRD', 'PAN_PRI', 'PRI_PRD', 'MC', 'CANDIDATO/A NO REGISTRADO/A',
       'VOTOS NULOS', 'TOTAL_VOTOS_CALCULADOS', 'LISTA_NOMINAL',
       'MORENA_COALITION_VOTES', 'OPPOSITION_COALITION_VOTES', 'OTHERS_VOTES',
       'MORENA_COALITION_PERCENTAGE', 'OPPOSITION_COALITION_PERCENTAGE',
       'OTHERS_PERCENTAGE', 'PARTICIPATION_PERCENTAGE'],
      dtype='object')

In [None]:
#correlation plots
