In [9]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from jinja2 import Environment, FileSystemLoader
import os

# Load the aggregated data
csv_file_path = 'C:/Proyectos/Elecciones-2024/CDMX_pres_aggregated_coalition.csv'  # Update with the correct file path
aggregated_data = pd.read_csv(csv_file_path)

# Ensure the plots directory exists
plots_dir = 'C:/Proyectos/Elecciones-2024/plots'
os.makedirs(plots_dir, exist_ok=True)


In [1]:
aggregated_data.columns


NameError: name 'aggregated_data' is not defined

In [12]:

# Generate the overall plot
total_votos_calculados_sum = aggregated_data['TOTAL_VOTOS_CALCULADOS'].sum()
lista_nominal_sum = aggregated_data['LISTA_NOMINAL'].sum()
participation_percentage = (total_votos_calculados_sum / lista_nominal_sum) * 100

plt.figure(figsize=(14, 8))

# MORENA Coalition
sns.regplot(x='PARTICIPATION_PERCENTAGE', y='MORENA_COALITION_PERCENTAGE', data=aggregated_data,
            scatter_kws={'color': 'darkred', 'alpha': 0.1}, line_kws={'color': 'darkred'}, label='Coalición Morena')

# Opposition Coalition
sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OPPOSITION_COALITION_PERCENTAGE', data=aggregated_data,
            scatter_kws={'color': 'blue', 'alpha': 0.1}, line_kws={'color': 'blue'}, label='Coalición Opositora')

# Others
sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OTHERS_PERCENTAGE', data=aggregated_data,
            scatter_kws={'color': 'orange', 'alpha': 0.1}, line_kws={'color': 'orange'}, label='MC y Otros')

# Setting labels and title
plt.xlabel('% de Participación')
plt.ylabel('% Voto')
plt.title('% de Voto vs. % de Participación')
plt.xlim(50, 100)  # Start the x-axis at 50
plt.ylim(0, 100)  # Ensure the y-axis ranges from 0 to 100

# Add participation percentage and sum of LISTA_NOMINAL to the plot with comma-separated thousands
plt.text(55, 90, f'Participación: {participation_percentage:.2f}%', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
plt.text(55, 85, f'Lista Nominal: {lista_nominal_sum:,.0f}', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))

plt.legend()
plt.grid(True)

# Save the overall plot
overall_plot_path = os.path.join(plots_dir, 'Vote_Percentage_vs_Participation_All_Data.png')
plt.savefig(overall_plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f'Saved overall plot at {overall_plot_path}')

# Ensure ID_DISTRITO_FEDERAL and DISTRITO_FEDERAL are in the data
if 'ID_DISTRITO_FEDERAL' in aggregated_data.columns and 'DISTRITO_FEDERAL' in aggregated_data.columns:
    unique_id_distritos = aggregated_data['ID_DISTRITO_FEDERAL'].unique()
    plot_file_paths = [{'id_distrito': 'All', 'distrito_name': 'All Data', 'file_path': overall_plot_path}]

    for id_distrito in unique_id_distritos:
        subset_data = aggregated_data[aggregated_data['ID_DISTRITO_FEDERAL'] == id_distrito]
        distrito_name = subset_data['DISTRITO_FEDERAL'].iloc[0]  # Assuming the name is the same within the same ID

        # Calculate PARTICIPATION_PERCENTAGE as the ratio of the sum of TOTAL_VOTOS_CALCULADOS to the sum of LISTA_NOMINAL
        total_votos_calculados_sum = subset_data['TOTAL_VOTOS_CALCULADOS'].sum()
        lista_nominal_sum = subset_data['LISTA_NOMINAL'].sum()
        participation_percentage = (total_votos_calculados_sum / lista_nominal_sum) * 100

        plt.figure(figsize=(14, 8))

        # MORENA Coalition
        sns.regplot(x='PARTICIPATION_PERCENTAGE', y='MORENA_COALITION_PERCENTAGE', data=subset_data,
                    scatter_kws={'color': 'darkred', 'alpha': 0.12}, line_kws={'color': 'darkred'}, label='MORENA Coalition')

        # Opposition Coalition
        sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OPPOSITION_COALITION_PERCENTAGE', data=subset_data,
                    scatter_kws={'color': 'blue', 'alpha': 0.12}, line_kws={'color': 'blue'}, label='Opposition Coalition')

        # Others
        sns.regplot(x='PARTICIPATION_PERCENTAGE', y='OTHERS_PERCENTAGE', data=subset_data,
                    scatter_kws={'color': 'orange', 'alpha': 0.12}, line_kws={'color': 'orange'}, label='Others')

        # Setting labels and title
        plt.xlabel('Participation Percentage')
        plt.ylabel('Vote Percentage')
        plt.title(f'Vote Percentage vs Participation Percentage by Coalition\nin Distrito Federal {distrito_name} (ID: {id_distrito})')
        plt.xlim(50, 100)  # Start the x-axis at 50
        plt.ylim(0, 100)  # Ensure the y-axis ranges from 0 to 100

        # Add participation percentage and sum of LISTA_NOMINAL to the plot with comma-separated thousands
        plt.text(55, 90, f'Participation: {participation_percentage:.2f}%', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))
        plt.text(55, 85, f'Sum of LISTA_NOMINAL: {lista_nominal_sum:,.0f}', fontsize=12, bbox=dict(facecolor='white', alpha=0.5))

        plt.legend()
        plt.grid(True)

        # Save the plot
        plot_file_path = os.path.join(plots_dir, f'Vote_Percentage_vs_Participation_{id_distrito}.png')
        plt.savefig(plot_file_path, dpi=300, bbox_inches='tight')
        plt.close()

        plot_file_paths.append({
            'id_distrito': id_distrito,
            'distrito_name': distrito_name,
            'file_path': plot_file_path
        })

        print(f'Saved plot for ID_DISTRITO_FEDERAL {id_distrito} at {plot_file_path}')

else:
    print("Columns 'ID_DISTRITO_FEDERAL' or 'DISTRITO_FEDERAL' not found in the dataset.")

# Generate the HTML content
env = Environment(loader=FileSystemLoader('.'))
template = env.get_template('template.html')

html_content = template.render(plots=plot_file_paths)

# Save the HTML content to a file
output_html_path = 'C:/Proyectos/Elecciones-2024/plots/index.html'
with open(output_html_path, 'w', encoding = "utf-8") as f:
    f.write(html_content)

print(f'Saved HTML file at {output_html_path}')



Saved overall plot at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_All_Data.png
Saved plot for ID_DISTRITO_FEDERAL 1.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_1.0.png
Saved plot for ID_DISTRITO_FEDERAL 2.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_2.0.png
Saved plot for ID_DISTRITO_FEDERAL 3.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_3.0.png
Saved plot for ID_DISTRITO_FEDERAL 4.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_4.0.png
Saved plot for ID_DISTRITO_FEDERAL 5.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_5.0.png
Saved plot for ID_DISTRITO_FEDERAL 6.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_6.0.png
Saved plot for ID_DISTRITO_FEDERAL 7.0 at C:/Proyectos/Elecciones-2024/plots\Vote_Percentage_vs_Participation_7.0.png
Saved plot for ID_DISTRITO_FEDERAL 8.0 at C:/Proyectos/Elecciones-2024/