In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm     

***Cargamos el dataframe obtenido en la etapa de pre-procesamieno***

In [2]:
def load_data(file):
    
    df = pd.read_csv(file)
    df.drop(columns = ['Unnamed: 0'], inplace = True)
    
    return df
df = load_data('dataframe_limpio2323.csv')

***Definimos las variables X e Y***

In [3]:
X = df[['por_consenso_federal_paso', 'por_fit_paso',
       'por_MAS_paso', 'por_Frente_todos_paso', 'por_frente_patriota_paso',
       'por_juntos_cambio_paso', 'por_accion_vecinal_paso',
       'por_autonomista_paso', 'por_unite_paso', 'por_NOS_paso',
       'por_votos_blanco_paso']]

y = df[['por_consenso_federal_gen', 'por_fit_gen', 'por_juntos_cambio_gen',
       'por_frente_todos_gen', 'por_NOS_gen', 'por_Unite_gen',
       'por_blancos_gen']]

***Creamos una función para obtener las regresiones***

In [4]:
r2 = []

def get_ols_regression(y,column_name,X):
    model = sm.OLS(y[column_name], X).fit()
    r_squared = model.rsquared
    r2.append(r_squared)
    model_summary = model.summary()
    results_as_html = model_summary.tables[1].as_html()
    table = pd.read_html(results_as_html, header=0, index_col=0)[0]
    table.reset_index(inplace = True)
    table.rename(columns = {'coef':column_name, 'index': 'Features'}, inplace = True)
    return table

In [5]:
results_JxC = get_ols_regression(y, 'por_juntos_cambio_gen', X)
results_frente_todos = get_ols_regression(y, 'por_frente_todos_gen', X)
results_consenso_federal = get_ols_regression(y, 'por_consenso_federal_gen', X)
results_unite = get_ols_regression(y, 'por_Unite_gen', X)
results_fit = get_ols_regression(y, 'por_fit_gen', X)
results_blancos = get_ols_regression(y, 'por_blancos_gen', X)
results_nos = get_ols_regression(y, 'por_NOS_gen', X)

In [6]:
r2

[0.9012713289770744,
 0.8971813479842055,
 0.4685221401646211,
 0.2101543774035468,
 0.20418830224213202,
 0.14583136783601514,
 0.4193946344645316]

In [7]:
results_blancos

Unnamed: 0,Features,por_blancos_gen,std err,t,P>|t|,[0.025,0.975]
0,por_consenso_federal_paso,-0.0043,0.001,-3.452,0.001,-0.007,-0.002
1,por_fit_paso,0.0371,0.003,11.691,0.0,0.031,0.043
2,por_MAS_paso,0.0604,0.007,8.059,0.0,0.046,0.075
3,por_Frente_todos_paso,0.0098,0.0,44.855,0.0,0.009,0.01
4,por_frente_patriota_paso,0.019,0.01,1.847,0.065,-0.001,0.039
5,por_juntos_cambio_paso,0.0092,0.0,24.805,0.0,0.009,0.01
6,por_accion_vecinal_paso,0.1112,0.013,8.4,0.0,0.085,0.137
7,por_autonomista_paso,-0.0384,0.016,-2.439,0.015,-0.069,-0.008
8,por_unite_paso,-0.0408,0.004,-9.569,0.0,-0.049,-0.032
9,por_NOS_paso,-0.0053,0.002,-2.16,0.031,-0.01,-0.0


***Creamos la matriz de transferencia***

In [8]:
def features_repetidas():
    features = ['Consenso Federal', 'FIT', 'MAS',
       'Frente de Todos', 'Frente Patriota', 'Juntos por el Cambio',
       'Acción Vecinal', 'Partido Autonomista', 'Unite',
       'Frente Nos', 'Votos en Blanco']
    
    total_votos_paso = [df['Votos_consenso_federal_paso'].sum(), df['Votos_fit_paso'].sum(),
                        df['Votos_MAS_paso'].sum(), df['Votos_Frente_todos_paso'].sum(),
                        df['Votos_frente_patriota_paso'].sum(), df['Votos_juntos_cambio_paso'].sum(), df['Votos_accion_vecinal_paso'].sum(),
                       df['Votos_autonomista_paso'].sum(), df['Votos_unite_paso'].sum(), df['Votos_NOS_paso'].sum(), df['votos_blanco_paso'].sum()]
    
    features = pd.DataFrame(features, columns = ['Features'])
    total_votos_paso = pd.DataFrame(total_votos_paso , columns = ['Total_votos_paso'])
    
    features_votos_paso = features.join(total_votos_paso)
    features_votos_paso = pd.concat([features_votos_paso]*7 , ignore_index = True)
    
    features_votos_paso.reset_index(drop = True , inplace = True)
    
    features_votos_paso['Coefficients'] = pd.Series(dtype=int)
    
    features_votos_paso.reset_index(drop = True , inplace=True)
    return features_votos_paso

def y_repetidas():
    
    ys_rep = ['Juntos por el Cambio',
       'Frente de Todos', 'Consenso Federal', 'Unite',
       'FIT', 'Frente Nos','Votos en blanco']
    
    total_votos_gen = [df['Votos_juntos_cambio_gen'].sum(), df['Votos_Frente_todos_gen'].sum(),
                        df['Votos_consenso_federal_gen'].sum(), df['Votos_unite_gen'].sum(),
                        df['Votos_fit_gen'].sum(), df['Votos_NOS_gen'].sum(), df['votos_blanco_gen'].sum()]
    

    ys_rep = pd.Series(ys_rep)
    ys_rep = ys_rep.repeat(11)
    
    total_votos_gen = pd.Series(total_votos_gen)
    total_votos_gen = total_votos_gen.repeat(11)
    
    ys_rep = pd.DataFrame(ys_rep , columns = ['ys_rep'])
    ys_rep.reset_index(drop = True, inplace = True)
    
    total_votos_gen = pd.DataFrame(total_votos_gen , columns = ['Total_votos_gen'])
    total_votos_gen.reset_index(drop = True, inplace = True)
    
    
    ys_total = ys_rep.join(total_votos_gen)
    
    ys_total = ys_total.reset_index(drop = True)
    
    return ys_total

features_votos_paso = features_repetidas()
ys_total = y_repetidas()

results_table = features_votos_paso.join(ys_total)
results_table.ys_rep.unique()


array(['Juntos por el Cambio', 'Frente de Todos', 'Consenso Federal',
       'Unite', 'FIT', 'Frente Nos', 'Votos en blanco'], dtype=object)

In [9]:
def insert_coefficients(slice1, slice2, df, column_name):
    
    results_table.loc[slice1 : slice2, 'Coefficients'] = df[column_name].values.reshape(-1,1)

    return results_table

results_table = insert_coefficients(0,10, results_JxC, 'por_juntos_cambio_gen')
results_table = insert_coefficients(11,21, results_frente_todos, 'por_frente_todos_gen')
results_table = insert_coefficients(22,32, results_consenso_federal, 'por_consenso_federal_gen')
results_table = insert_coefficients(33,43, results_unite, 'por_Unite_gen')
results_table = insert_coefficients(44,54, results_fit, 'por_fit_gen')
results_table = insert_coefficients(55,65, results_nos, 'por_NOS_gen')
results_table = insert_coefficients(66,76, results_blancos, 'por_blancos_gen')

***Ya tenemos la Tabla que nos permitirá crear el diagrama Sankey***

In [11]:
# results_table.to_csv(r'C:\Users\Administrador\Desktop\Carrera de Data Science\DS-Cor\Transferencia votos\dataframe_tabla_resultados_transferncia.csv')

In [13]:
results_table

Unnamed: 0,Features,Total_votos_paso,Coefficients,ys_rep,Total_votos_gen
0,Consenso Federal,1251064.0,0.3762,Juntos por el Cambio,6748101.0
1,FIT,365498.0,0.2507,Juntos por el Cambio,6748101.0
2,MAS,97762.0,0.0054,Juntos por el Cambio,6748101.0
3,Frente de Todos,6653143.0,0.0145,Juntos por el Cambio,6748101.0
4,Frente Patriota,34598.0,0.2165,Juntos por el Cambio,6748101.0
...,...,...,...,...,...
72,Acción Vecinal,29203.0,0.1112,Votos en blanco,197306.0
73,Partido Autonomista,22435.0,-0.0384,Votos en blanco,197306.0
74,Unite,345802.0,-0.0408,Votos en blanco,197306.0
75,Frente Nos,450918.0,-0.0053,Votos en blanco,197306.0


***Creamos una matriz de transferencia con todos los coeficientes***

In [15]:
def tabla(df, nombre_columna):
    
    df = df[['Features', nombre_columna]]
    
    return df

juntos_cambio = tabla(results_JxC, 'por_juntos_cambio_gen')
frente_todos = tabla(results_frente_todos, 'por_frente_todos_gen')
consenso_federal = tabla(results_consenso_federal, 'por_consenso_federal_gen')
fit = tabla(results_fit, 'por_fit_gen')
unite = tabla(results_unite, 'por_Unite_gen')
nos = tabla(results_nos, 'por_NOS_gen')
blancos = tabla(results_blancos, 'por_blancos_gen')


from functools import reduce

dataframes = [juntos_cambio, frente_todos, consenso_federal, fit, unite, nos,blancos]

transferencia_porcentual = reduce(lambda  left,right: pd.merge(left,right,on=['Features'],
                                            how='outer'), dataframes)


In [16]:
transferencia_porcentual

Unnamed: 0,Features,por_juntos_cambio_gen,por_frente_todos_gen,por_consenso_federal_gen,por_fit_gen,por_Unite_gen,por_NOS_gen,por_blancos_gen
0,por_consenso_federal_paso,0.3762,0.1238,0.434,0.0246,0.0397,0.006,-0.0043
1,por_fit_paso,0.2507,0.1532,0.1895,0.307,0.047,0.0155,0.0371
2,por_MAS_paso,0.0054,0.4638,0.182,0.2197,0.0686,8.2e-05,0.0604
3,por_Frente_todos_paso,0.0145,0.9231,0.0247,0.0086,0.0054,0.0139,0.0098
4,por_frente_patriota_paso,0.2165,0.61,0.0606,-0.015,0.0321,0.0768,0.019
5,por_juntos_cambio_paso,1.023,-0.0247,-0.0044,0.0029,0.0013,-0.0073,0.0092
6,por_accion_vecinal_paso,0.8199,-0.0118,-0.0129,0.0192,0.0096,0.0647,0.1112
7,por_autonomista_paso,0.6608,0.2913,0.0107,0.005,0.0548,0.0158,-0.0384
8,por_unite_paso,0.4812,0.0823,0.1963,0.0587,0.2234,-0.0011,-0.0408
9,por_NOS_paso,0.5081,0.0489,0.0513,-0.0099,0.0255,0.3814,-0.0053


In [17]:
# transferencia_porcentual.iloc[:,1:] = transferencia_porcentual.iloc[:,1:] * 100
# transferencia_porcentual

***Creamos una matriz de transferencia con los votos totales transeridos hacia cada partido***

In [18]:
transferencia_votos = results_table[['Features', 'Total_votos_paso']].drop_duplicates()
transferencia_votos['Juntos_por_cambio'] = (transferencia_votos['Total_votos_paso'] * results_JxC['por_juntos_cambio_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos['Frente_todos'] = (transferencia_votos['Total_votos_paso'] * transferencia_porcentual['por_frente_todos_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos['Consenso_federal'] = (transferencia_votos['Total_votos_paso'] * transferencia_porcentual['por_consenso_federal_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos['Unite'] = (transferencia_votos['Total_votos_paso'] * transferencia_porcentual['por_Unite_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos['FIT'] = (transferencia_votos['Total_votos_paso'] * transferencia_porcentual['por_fit_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos['NOS'] = (transferencia_votos['Total_votos_paso'] * transferencia_porcentual['por_NOS_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos['Votos_blanco'] = (transferencia_votos['Total_votos_paso'] * transferencia_porcentual['por_blancos_gen']).apply(lambda x: '%.0f' % x)
transferencia_votos

Unnamed: 0,Features,Total_votos_paso,Juntos_por_cambio,Frente_todos,Consenso_federal,Unite,FIT,NOS,Votos_blanco
0,Consenso Federal,1251064.0,470650,154882,542962,49667,30776,7506,-5380
1,FIT,365498.0,91630,55994,69262,17178,112208,5665,13560
2,MAS,97762.0,528,45342,17793,6706,21478,8,5905
3,Frente de Todos,6653143.0,96471,6141516,164333,35927,57217,92479,65201
4,Frente Patriota,34598.0,7490,21105,2097,1111,-519,2657,657
5,Juntos por el Cambio,4918290.0,5031411,-121482,-21640,6394,14263,-35904,45248
6,Acción Vecinal,29203.0,23944,-345,-377,280,561,1889,3247
7,Partido Autonomista,22435.0,14825,6535,240,1229,112,354,-862
8,Unite,345802.0,166400,28460,67881,77252,20299,-380,-14109
9,Frente Nos,450918.0,229111,22050,23132,11498,-4464,171980,-2390


In [19]:
transferencia_votos.iloc[:,1:] = transferencia_votos.iloc[:,1:].astype(float)

In [20]:
transferencia_votos.loc[len(transferencia_votos),'Features'] = 'Total'

In [22]:
transferencia_votos.iloc[11,1:9] = transferencia_votos.sum(axis = 0)
transferencia_votos

Unnamed: 0,Features,Total_votos_paso,Juntos_por_cambio,Frente_todos,Consenso_federal,Unite,FIT,NOS,Votos_blanco
0,Consenso Federal,1251064.0,470650.0,154882.0,542962.0,49667.0,30776.0,7506.0,-5380.0
1,FIT,365498.0,91630.0,55994.0,69262.0,17178.0,112208.0,5665.0,13560.0
2,MAS,97762.0,528.0,45342.0,17793.0,6706.0,21478.0,8.0,5905.0
3,Frente de Todos,6653143.0,96471.0,6141516.0,164333.0,35927.0,57217.0,92479.0,65201.0
4,Frente Patriota,34598.0,7490.0,21105.0,2097.0,1111.0,-519.0,2657.0,657.0
5,Juntos por el Cambio,4918290.0,5031411.0,-121482.0,-21640.0,6394.0,14263.0,-35904.0,45248.0
6,Acción Vecinal,29203.0,23944.0,-345.0,-377.0,280.0,561.0,1889.0,3247.0
7,Partido Autonomista,22435.0,14825.0,6535.0,240.0,1229.0,112.0,354.0,-862.0
8,Unite,345802.0,166400.0,28460.0,67881.0,77252.0,20299.0,-380.0,-14109.0
9,Frente Nos,450918.0,229111.0,22050.0,23132.0,11498.0,-4464.0,171980.0,-2390.0


***Una vez obtenida la matriz, podemos obtener la cantidad total de votos (y_pred) que recibió cada partido según nuestro modelo. Esto nos servirá para compararla con la cantidad real de votos (y_real) que recibió cada agrupación***

In [23]:
y_pred = transferencia_votos.iloc[11,2:].values
y_pred

array([6244183.0, 6528888.0, 879931.0, 216357.0, 266302.0, 261364.0,
       182274.0], dtype=object)

In [27]:
y_real = results_table.Total_votos_gen.unique()
y_real

array([6748101., 7048170.,  952432.,  233549.,  287253.,  283742.,
        197306.])

In [31]:
def model_test():
    
    y_p = pd.DataFrame(y_pred, columns = ['y_pred'])
    y_r = pd.DataFrame(y_real, columns = ['y_real'])
    
    df_test = y_r.join(y_p)
    
    df_test['Difference'] = df_test['y_real'] - df_test['y_pred']
    return df_test

df_test = model_test()
df_test

Unnamed: 0,y_real,y_pred,Difference
0,6748101.0,6244180.0,503918
1,7048170.0,6528890.0,519282
2,952432.0,879931.0,72501
3,233549.0,216357.0,17192
4,287253.0,266302.0,20951
5,283742.0,261364.0,22378
6,197306.0,182274.0,15032
