In [10]:
import os
import pandas as pd
import matplotlib.pyplot as plt

path_to_folder = 'D:\\python_scripts\\map_votes'

'''
Open votes information as a pandas DataFrame
'''

## Data was download from 
## 'https://www.datosabiertos.gob.pe/dataset/resultados-por-mesa-de-las-elecciones-presidenciales-2021-segunda-vuelta-oficina-nacional-de'
path_to_vote_file = os.path.join(path_to_folder, 'resultados.csv')
votes_df = pd.read_csv(path_to_vote_file, sep=',', encoding='utf-8', index_col=False) #To avoid error ' byte 0xbf' encode with 'latin-1'
votes_df = votes_df.rename(columns={'ï»¿UBIGEO':'UBIGEO'}) # correct some column names
votes_df = votes_df.fillna(0) # Avoid nan values

# 'UBIGEO' >= 910101 aren't inside Peru
votes_df = votes_df[votes_df['UBIGEO'] < 910101]

# information
# P1:Peru Libre
# P2:Fuerza Popular

# Group by district (DISTRITO) and sort alphabetically
votes_by_district = votes_df.groupby(['UBIGEO', 'DISTRITO']).sum()
votes_by_district = votes_by_district.reset_index ()
votes_by_district = votes_by_district.sort_values(by=['DISTRITO'])
votes_by_district = votes_by_district.reset_index ()
votes_by_district = votes_by_district.drop(['index'], axis=1)


'''
Equalize district names in both dataframes
'''

# In votes_by_districts
votes_by_district = votes_by_district.drop([951,1136])


# Adding 1 column to votes_by_district with pertentual rate of votes 
# -1 -> +1 
# -1 : 100% for Fuerza Popular
# +1 : 100% for Peru Libre
votes_by_district['rate'] = round(2*(-.5+votes_by_district['VOTOS_P1']/(votes_by_district['VOTOS_P1']+votes_by_district['VOTOS_P2'])),2)
votes_by_district['absenteeism'] = round(votes_by_district['N_CVAS']/votes_by_district['N_ELEC_HABIL'], 2)


In [None]:
# Usaremos el dataframe creado con el script votes.py
# Con ello tendremos el dataframe votes_by_district

In [11]:
print(votes_by_district.head()); print(votes_by_district.columns)

   UBIGEO                 DISTRITO  MESA_DE_VOTACION  TIPO_OBSERVACION  \
0   30101                  ABANCAY            634260               0.0   
1   20302  ABELARDO PARDO LEZAMETA              1373               0.0   
2   40402                    ACARI             99654               0.0   
3   22001                     ACAS              7479               0.0   
4   71002                    ACCHA            164865               0.0   

    N_CVAS  N_ELEC_HABIL  VOTOS_P1  VOTOS_P2  VOTOS_VB  VOTOS_VN  VOTOS_VI  \
0  36463.0         47150   25534.0    8799.0     239.0    1891.0       0.0   
1    123.0           229      72.0      35.0       1.0      15.0       0.0   
2   2307.0          3325     842.0    1374.0      22.0      69.0       0.0   
3    166.0           322      87.0      76.0       0.0       3.0       0.0   
4   1933.0          2804    1622.0     188.0      32.0      91.0       0.0   

   Column1  rate  absenteeism  
0      0.0  0.49         0.77  
1      0.0  0.35      

In [12]:
# De este solo necesitamos algunas columnas:
# VOTOS_P1 (Perú Libre), VOTOS_P2 (Fuerza Popular), N_CVAS (votantes que asistieron)
# VOTOS_VB (blanco), VOTOS_VN (nulos), rate y absenteeism (ausentismo)

In [13]:
datos_elecciones = votes_by_district[['absenteeism', 'VOTOS_VB', 'VOTOS_VN']]
y = votes_by_district['rate']

In [14]:
# importando librería de ML
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso

In [15]:
X_train, X_test, y_train, y_test = train_test_split(datos_elecciones, y)

In [17]:
# construyendo el modelo lineal básico
lr = LinearRegression().fit(X_train, y_train)

In [18]:
print(lr.coef_, lr.intercept_)

[ 2.38015776e-02  3.17628411e-05 -5.60554415e-05] 0.37340974313551323


In [21]:
print('La eficiencia de este modelo de regresión lineal con los datos de entrenamiento es {}'.format(round(lr.score(X_train, y_train), 2)))

La eficiencia de este modelo de regresión lineal con los datos de entrenamiento es 0.07


In [22]:
# con ello evidenciamos, de manera muy superficial, que las columnas elegidas no pueden
# explicar las votaciones para ambos candidatos

# Se procederá a ajustar el modelo lineal con Ridge y Lasso.
# ...