In [4]:
import pandas as pd
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgb
import seaborn as sns
from pandas.plotting import scatter_matrix
import pickle

# Statistical data year by year

In [5]:
path = 'dane_years/'
files = list(filter(lambda x: os.path.isfile(path+x), os.listdir(path)))
files.sort()
files

['2001.csv',
 '2002.csv',
 '2003.csv',
 '2004.csv',
 '2005.csv',
 '2006.csv',
 '2007.csv',
 '2008.csv',
 '2009.csv',
 '2010.csv',
 '2011.csv',
 '2012.csv',
 '2013.csv',
 '2014.csv',
 '2015.csv',
 '2016.csv',
 '2017.csv',
 '2018.csv']

In [6]:
yr_list = [(lambda x: pd.read_csv(path+x,index_col=0, header=0))(f) for f in files[:-3]]

In [4]:
for yi in range(len(yr_list)):
    y = files[yi].split('.')[0]
    c = yr_list[yi].columns
    c = [y+'-'+ci for ci in c]
    # c = [y[2:]+'-'+str(ci) for ci in range(len(c))]
    yr_list[yi].columns = c

In [5]:
df_yr = pd.concat(yr_list, axis=1, sort=False)
df_yr = df_yr.dropna(axis='columns')

In [6]:
df_yr_scaled = df_yr / df_yr.max()

In [7]:
df_yr_scaled

Unnamed: 0_level_0,2001-emeryci_i_rencisci,2001-bezrobocie_zarejsestrowane,2001-malzenstwa_zawarte,2001-dochody_gminy,2001-wyksztalcenie_gim_pod_nizsze,2001-wyksztalcenie_srednie,2001-rozwody_powiat,2001-praca_najemna,2001-praca_wlasny_rachunek,2001-socjal,...,2018-rozwody_powiat,2018-udzial_wiek_przedprodukcyjny,2018-udzial_wiek_produkcyjny,2018-udzial_wiek_poprodukcyjny,2018-praca_najemna,2018-praca_wlasny_rachunek,2018-socjal_500plus,2018-socjal,2018-PKB_na_1_mieszkanca,2018-przestepstwa_ogolem
jednostka,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MAŁOPOLSKIE,0.684194,0.615666,0.649653,0.474158,0.738602,0.849112,0.377421,0.72884,0.785987,0.861968,...,0.462325,0.969388,0.983871,0.843882,0.782569,0.781085,0.831224,0.764644,0.572133,0.662219
ŚLĄSKIE,0.97857,0.905336,0.892075,0.836481,0.677812,0.931953,1.0,0.85263,0.620205,1.0,...,0.8591,0.867347,0.972581,0.953586,0.840657,0.507728,0.760983,1.0,0.645658,0.976675
LUBUSKIE,0.200065,0.295237,0.184317,0.158825,0.744681,0.91716,0.232097,0.734198,0.865055,0.856056,...,0.171567,0.923469,0.983871,0.886076,0.828977,0.73719,0.895383,0.794809,0.511034,0.223009
WIELKOPOLSKIE,0.671868,0.685512,0.683248,0.506664,0.74772,0.872781,0.56418,0.706902,0.819909,0.830279,...,0.609864,0.984694,0.979032,0.839662,0.747182,0.585568,0.847853,0.767258,0.669339,0.531534
ZACHODNIOPOMORSKIE,0.304977,0.506593,0.30946,0.277003,0.835866,0.902367,0.334184,0.790305,0.731837,0.821686,...,0.344399,0.882653,0.980645,0.924051,0.844601,0.640649,0.645446,0.869462,0.515353,0.354457
DOLNOŚLĄSKIE,0.588495,0.77267,0.536344,0.492648,0.75076,0.902367,0.704249,0.780474,0.717405,0.90501,...,0.590157,0.867347,0.974194,0.953586,0.874673,0.615542,0.759245,0.859194,0.681654,0.662476
OPOLSKIE,0.183288,0.226966,0.182294,0.154542,0.775076,0.807692,0.202222,0.819269,0.434563,0.786646,...,0.159132,0.811224,1.0,0.92827,0.674806,0.321734,0.875155,0.861166,0.492935,0.153602
KUJAWSKO-POMORSKIE,0.40127,0.592954,0.419867,0.320593,0.841945,0.798817,0.385077,0.704956,0.551622,0.801979,...,0.343661,0.918367,0.982258,0.886076,0.707932,0.41994,0.813105,0.833474,0.504009,0.386541
POMORSKIE,0.376686,0.507731,0.446247,0.360868,0.74772,0.91716,0.380123,0.868956,0.964722,0.792637,...,0.375066,1.0,0.972581,0.843882,0.828416,0.841332,0.922686,0.79866,0.603279,0.402294
WARMIŃSKO-MAZURSKIE,0.258123,0.515295,0.286707,0.208886,1.0,0.816568,0.309563,0.650921,0.530406,0.800284,...,0.262831,0.933673,0.998387,0.835443,0.69359,0.652391,1.0,0.768949,0.426751,0.223435


In [8]:
corr = df_yr_scaled.corr()
#corr.style.background_gradient(cmap='coolwarm')

In [9]:
#corr.style.background_gradient(cmap='coolwarm').set_properties(**{'font-size': '0pt'})

## Clustering

In [7]:
df_jednostki = pd.read_csv('dane/_jednostki.csv', sep=';')
df_jednostki.okręgi = [ o.replace(' ', '_') for o in df_jednostki.okręgi ]
df_jednostki.województwo = [ w.upper() for w in df_jednostki.województwo ]

df_jednostki.loc[df_jednostki['województwo']=='WARSZAWA','województwo'] = 'MAZOWIECKIE'

## 2001

In [14]:
wyniki_2001 = pd.read_csv("wyniki_wyborow/"+"2001_W"+".csv")
col = wyniki_2001.columns.values.tolist()
col[0] = 'jednostka'
wyniki_2001.columns = col
wyniki_2001['jednostka'] = [ w.replace('O_', '') for w in wyniki_2001['jednostka'] ]
wyniki_2001 = wyniki_2001.fillna(0)

In [15]:
wyniki_2001

Unnamed: 0,jednostka,PiS,Komitet Wyborczy - Akcja Wyborcza Solidarność Prawicy,Komitet Wyborczy Alternatywa Ruch Społeczny,LPR,PSL,Komitet Wyborczy Polskiej Partii Socjalistycznej,Komitet Wyborczy Polskiej Unii Gospodarczej,Komitet Wyborczy Polskiej Wspólnoty Narodowej,Komitet Wyborczy Polskiej Wspłlnoty Narodowej,Samoobrona,SLD,Komitet Wyborczy Unii Wolności,"Komitet Wyborczy Wyborców ""Mniejszość Niemiecka""","Komitet Wyborczy Wyborców ""Niemiecka Mniejszość Górnego śląska""","Komitet Wyborczy Wyborcłw ""Mniejszołł Niemiecka""","Komitet Wyborczy Wyborcłw ""Niemiecka Mniejszołł Głrnego łlłska""",PO
0,Polska,1236787.0,729207.0,54266,1025148,1168659,13459.0,7189.0,2644.0,0.0,1327624,5342519,404074.0,47230.0,8024.0,0.0,0.0,1651099.0
1,wielkopolskie,79549.0,51760.0,3731,81008,107862,0.0,1714.0,0.0,271.0,126610,548472,34221.0,0.0,0.0,0.0,0.0,150489.0
2,śląskie,155427.0,142534.0,10751,78242,66567,1968.0,1272.0,0.0,0.0,116227,750589,63339.0,0.0,0.0,4890.0,8024.0,223419.0
3,lubuskie,17672.0,18695.0,1172,16978,23382,799.0,1434.0,0.0,0.0,29957,160790,10492.0,0.0,0.0,0.0,0.0,30622.0
4,opolskie,16576.0,11387.0,1273,20574,15802,0.0,0.0,0.0,1161.0,34236,120778,9596.0,0.0,0.0,42340.0,0.0,37203.0
5,zachodniopomorskie,35776.0,22741.0,1670,28733,21172,568.0,0.0,0.0,0.0,81632,268734,20659.0,0.0,0.0,0.0,0.0,63505.0
6,mazowieckie,276893.0,86335.0,6038,143719,198686,2143.0,237.0,0.0,616.0,157924,644961,71550.0,0.0,0.0,0.0,0.0,255252.0
7,kujawsko-pomorskie,48183.0,30918.0,1697,52926,55925,666.0,1430.0,0.0,0.0,72230,327913,16426.0,0.0,0.0,0.0,0.0,62160.0
8,lubelskie,58632.0,32616.0,3063,81413,147589,1179.0,0.0,0.0,0.0,111651,266191,14530.0,0.0,0.0,0.0,0.0,56891.0
9,warmińsko-mazurskie,30474.0,14423.0,1385,32716,33618,836.0,0.0,0.0,0.0,46935,201840,15519.0,0.0,0.0,0.0,0.0,46911.0


In [16]:
wyn_01 = wyniki_2001
#wyniki_2001.iloc[0,1:] = wyniki_2001.iloc[0,1:].div(wyniki_2001.iloc[0,1:].sum()).fillna(0)*100
wyn_01

Unnamed: 0,jednostka,PiS,Komitet Wyborczy - Akcja Wyborcza Solidarność Prawicy,Komitet Wyborczy Alternatywa Ruch Społeczny,LPR,PSL,Komitet Wyborczy Polskiej Partii Socjalistycznej,Komitet Wyborczy Polskiej Unii Gospodarczej,Komitet Wyborczy Polskiej Wspólnoty Narodowej,Komitet Wyborczy Polskiej Wspłlnoty Narodowej,Samoobrona,SLD,Komitet Wyborczy Unii Wolności,"Komitet Wyborczy Wyborców ""Mniejszość Niemiecka""","Komitet Wyborczy Wyborców ""Niemiecka Mniejszość Górnego śląska""","Komitet Wyborczy Wyborcłw ""Mniejszołł Niemiecka""","Komitet Wyborczy Wyborcłw ""Niemiecka Mniejszołł Głrnego łlłska""",PO
0,Polska,1236787.0,729207.0,54266,1025148,1168659,13459.0,7189.0,2644.0,0.0,1327624,5342519,404074.0,47230.0,8024.0,0.0,0.0,1651099.0
1,wielkopolskie,79549.0,51760.0,3731,81008,107862,0.0,1714.0,0.0,271.0,126610,548472,34221.0,0.0,0.0,0.0,0.0,150489.0
2,śląskie,155427.0,142534.0,10751,78242,66567,1968.0,1272.0,0.0,0.0,116227,750589,63339.0,0.0,0.0,4890.0,8024.0,223419.0
3,lubuskie,17672.0,18695.0,1172,16978,23382,799.0,1434.0,0.0,0.0,29957,160790,10492.0,0.0,0.0,0.0,0.0,30622.0
4,opolskie,16576.0,11387.0,1273,20574,15802,0.0,0.0,0.0,1161.0,34236,120778,9596.0,0.0,0.0,42340.0,0.0,37203.0
5,zachodniopomorskie,35776.0,22741.0,1670,28733,21172,568.0,0.0,0.0,0.0,81632,268734,20659.0,0.0,0.0,0.0,0.0,63505.0
6,mazowieckie,276893.0,86335.0,6038,143719,198686,2143.0,237.0,0.0,616.0,157924,644961,71550.0,0.0,0.0,0.0,0.0,255252.0
7,kujawsko-pomorskie,48183.0,30918.0,1697,52926,55925,666.0,1430.0,0.0,0.0,72230,327913,16426.0,0.0,0.0,0.0,0.0,62160.0
8,lubelskie,58632.0,32616.0,3063,81413,147589,1179.0,0.0,0.0,0.0,111651,266191,14530.0,0.0,0.0,0.0,0.0,56891.0
9,warmińsko-mazurskie,30474.0,14423.0,1385,32716,33618,836.0,0.0,0.0,0.0,46935,201840,15519.0,0.0,0.0,0.0,0.0,46911.0


In [17]:
col_b = ['LPR','PSL','PiS','Samoobrona']
col_r = ['PO']
col_g = [c for c in wyn_01.columns.values.tolist() if c not in col_r+col_b+['jednostka','nr','okręgi','powiaty','SLD']]
wyn_01['Blue'] = wyn_01.loc[:, col_b].sum(axis = 1, skipna = True) 
wyn_01['Red'] = wyn_01.loc[:, col_r].sum(axis = 1, skipna = True) 
wyn_01['Gray'] = wyn_01.loc[:, col_g].sum(axis = 1, skipna = True) 

In [18]:
wyn_01 = wyn_01[['jednostka','Blue','Red','Gray']].iloc[1:,:]
wyn_01.columns = ['województwo','Blue','Red','Gray']
wyn_01['województwo'] = [w.upper() for w in wyn_01['województwo'] ]
wyn_01 = wyn_01.set_index('województwo').sort_index()

In [20]:
wyn_01.sort_index()

Unnamed: 0_level_0,Blue,Red,Gray
województwo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DOLNOŚLĄSKIE,292867.0,136806.0,88833.0
KUJAWSKO-POMORSKIE,229264.0,62160.0,51137.0
LUBELSKIE,399285.0,56891.0,51388.0
LUBUSKIE,87989.0,30622.0,32592.0
MAZOWIECKIE,777222.0,255252.0,166919.0
MAŁOPOLSKIE,465737.0,187768.0,132698.0
OPOLSKIE,87188.0,37203.0,65757.0
PODKARPACKIE,350822.0,58693.0,87850.0
PODLASKIE,184104.0,33478.0,29324.0
POMORSKIE,235172.0,189417.0,56839.0


In [None]:
wyn_01.to_csv("wyniki_wyborow/2001_O_simplified.csv")
wyn_01

## 2005

In [21]:
wyniki_2005 = pd.read_csv("wyniki_wyborow/"+"2005_O"+".csv")
col = wyniki_2005.columns.values.tolist()
col[0] = 'jednostka'
wyniki_2005.columns = col
wyniki_2005['jednostka'] = [ w.replace('O_', '') for w in wyniki_2005['jednostka'] ]
wyniki_2005 = wyniki_2005.fillna(0)

wyn_05 = wyniki_2005.merge(df_jednostki,how='outer',left_on=['jednostka'],right_on=['okręgi'])

In [22]:
col_b = ['LPR','PSL','PiS','Samoobrona']
col_r = ['SLD','PO']
col_g = [c for c in wyn_05.columns.values.tolist() if c not in col_r+col_b+['jednostka','nr','okręgi','powiaty']]
wyn_05['Blue'] = wyn_05.loc[:, col_b].sum(axis = 1, skipna = True) 
wyn_05['Red'] = wyn_05.loc[:, col_r].sum(axis = 1, skipna = True) 
wyn_05['Gray'] = wyn_05.loc[:, col_g].sum(axis = 1, skipna = True) 

In [23]:
wyn_05 = wyn_05.groupby('województwo').agg(np.sum)
wyn_05 = wyn_05[['Blue','Red','Gray']].sort_index()
wyn_05
wyn_05.to_csv("wyniki_wyborow/2005_O_simplified.csv")

Unnamed: 0_level_0,Blue,Red,Gray
województwo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DOLNOŚLĄSKIE,403165.0,354677,109375.0
KUJAWSKO-POMORSKIE,301227.0,199374,70123.0
LUBELSKIE,464224.0,156883,65688.0
LUBUSKIE,134531.0,109282,26711.0
MAZOWIECKIE,978071.0,627435,222722.0
MAŁOPOLSKIE,648485.0,350577,92667.0
OPOLSKIE,114005.0,92401,60418.0
PODKARPACKIE,467906.0,162311,46239.0
PODLASKIE,208603.0,95218,41665.0
POMORSKIE,314959.0,332658,69703.0


## 2007

In [42]:
wyniki = pd.read_csv("wyniki_wyborow/"+"2007_O"+".csv")
col = wyniki.columns.values.tolist()
col[0] = 'jednostka'
wyniki.columns = col
wyniki['jednostka'] = [ w.replace('O_', '') for w in wyniki['jednostka'] ]
wyniki = wyniki.fillna(0)

wyn_07 = wyniki.merge(df_jednostki,how='outer',left_on=['jednostka'],right_on=['okręgi'])

In [22]:
col_b = ['PSL','PiS']
col_r = ['SLD','PO']
col_g = [c for c in wyn_07.columns.values.tolist() if c not in col_r+col_b+['jednostka','nr','okręgi','powiaty']]
wyn_07['Blue'] = wyn_07.loc[:, col_b].sum(axis = 1, skipna = True) 
wyn_07['Red'] = wyn_07.loc[:, col_r].sum(axis = 1, skipna = True) 
wyn_07['Gray'] = wyn_07.loc[:, col_g].sum(axis = 1, skipna = True) 

In [23]:
wyn_07 = wyn_07.groupby('województwo').agg(np.sum)
wyn_07 = wyn_07[['Blue','Red','Gray']]
wyn_07.to_csv("wyniki_wyborow/2007_O_simplified.csv")
wyn_07

Unnamed: 0_level_0,Blue,Red,Gray
województwo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DOLNOŚLĄSKIE,420983,768406,42581.0
KUJAWSKO-POMORSKIE,287570,490576,33485.0
LUBELSKIE,478302,322140,52762.0
LUBUSKIE,121621,255057,17537.0
MAZOWIECKIE,1108208,1361875,96444.0
MAŁOPOLSKIE,692611,662226,53468.0
OPOLSKIE,108687,212933,46920.0
PODKARPACKIE,481519,306938,33134.0
PODLASKIE,221146,217120,22914.0
POMORSKIE,313144,627796,35200.0


## 2011

In [45]:
wyniki = pd.read_csv("wyniki_wyborow/"+"2011_O"+".csv")
col = wyniki.columns.values.tolist()
col[0] = 'jednostka'
wyniki.columns = col

#Kraków II -> Chrzanów 

wyniki['jednostka'] = [ w.replace('O_', '') for w in wyniki['jednostka'] ]
wyniki = wyniki.fillna(0)

wyn_11 = wyniki.merge(df_jednostki,how='outer',left_on=['jednostka'],right_on=['okręgi'])
wyniki_2001 = wyn_11
wyniki_2001.iloc[0,1:] = wyniki_2001.iloc[0,1:].div(wyniki_2001.iloc[0,1:].sum()).fillna(0)*100
wyniki_2001.head(1)

Unnamed: 0,jednostka,Komitet Wyborczy Nasz Dom Polska-Samoobrona Andrzeja Leppera,Komitet Wyborczy Nowa Prawica - Janusza Korwin-Mikke,PO,Komitet Wyborczy Polska Jest Najważniejsza,Komitet Wyborczy Polska Partia Pracy - Sierpień 80,PSL,Komitet Wyborczy Prawica,PiS,Ruch Palikota/Twój Ruch,SLD,Komitet Wyborczy Wyborców Mniejszość Niemiecka,nr,okręgi,powiaty,województwo
0,Polska,0.067734,1.056661,39.17862,2.194878,0.550798,8.362349,0.244748,29.889802,10.017674,8.241781,0.194955,0.0,0,0,0


In [46]:
col_b = ['PiS']
col_r = ['SLD','PO','Ruch Palikota/Twój Ruch','PSL']
col_g = [c for c in wyn_11.columns.values.tolist() if c not in col_r+col_b+['jednostka','nr','okręgi','powiaty']]
wyn_11['Blue'] = wyn_11.loc[:, col_b].sum(axis = 1, skipna = True) 
wyn_11['Red'] = wyn_11.loc[:, col_r].sum(axis = 1, skipna = True) 
wyn_11['Gray'] = wyn_11.loc[:, col_g].sum(axis = 1, skipna = True) 

In [47]:
wyn_11 = wyn_11.groupby('województwo').agg(np.sum)
wyn_11 = wyn_11[['Blue','Red','Gray']].iloc[1:,:]
wyn_11.to_csv("wyniki_wyborow/2011_O_simplified.csv")
wyn_11

Unnamed: 0_level_0,Blue,Red,Gray
województwo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DOLNOŚLĄSKIE,276859.0,752817.0,36523.0
KUJAWSKO-POMORSKIE,171681.0,519013.0,28595.0
LUBELSKIE,301627.0,441492.0,34965.0
LUBUSKIE,72302.0,247803.0,12551.0
MAZOWIECKIE,760501.0,1487679.0,96617.0
MAŁOPOLSKIE,504545.0,712760.0,65440.0
OPOLSKIE,65739.0,215099.0,38828.0
PODKARPACKIE,352949.0,368626.0,33776.0
PODLASKIE,158572.0,249562.0,20589.0
POMORSKIE,215778.0,626760.0,26616.0


## 2015

In [48]:
wyniki = pd.read_csv("wyniki_wyborow/2015_O.csv")
col = wyniki.columns.values.tolist()

col[-1] = 'jednostka_nr'
wyniki.columns = col

wyniki = wyniki.fillna(0)

wyn_15 = wyniki.merge(df_jednostki,how='outer',left_on=['jednostka_nr'],right_on=['nr'])

wyn_15 = wyn_15.iloc[:,1:]
wyn_15.columns

Index(['SLD', 'Razem KW KORWiN', 'Nowoczesna.pl', 'PO', 'PiS',
       'Razem KW Razem', 'Razem KW Samoobrona',
       'Razem KWW Grzegorza Brauna „Szczęść Boże!”',
       'Razem KWW JOW Bezpartyjni', 'Razem KWW Mniejszość Niemiecka', 'PO.1',
       'Razem KWW Ruch Społeczny RP', 'Razem KWW Zbigniewa Stonogi',
       'Razem KWW Zjednoczeni dla Śląska', 'Kukiz',
       'Razem Komitet Wyborczy Kongres Nowej Prawicy', 'PSL', 'jednostka_nr',
       'nr', 'okręgi', 'powiaty', 'województwo'],
      dtype='object')

In [49]:
col_b = ['PiS','PSL','Kukiz']#,'KORWIN/ Wolnośc/ KONFEDERACJA']
col_r = ['SLD','PO','Nowoczesna.pl']#,'Wiosna','Razem']
col_g = [c for c in wyn_15.columns.values.tolist() if c not in col_r+col_b+['jednostka','nr','okręgi','powiaty']]
wyn_15['Blue'] = wyn_15.loc[:, col_b].sum(axis = 1, skipna = True) 
wyn_15['Red'] = wyn_15.loc[:, col_r].sum(axis = 1, skipna = True) 
wyn_15['Gray'] = wyn_15.loc[:, col_g].sum(axis = 1, skipna = True) 

In [50]:
wyn_15 = wyn_15.groupby('województwo').agg(np.sum)
wyn_15 = wyn_15[['Blue','Red','Gray']]
wyn_15.to_csv("wyniki_wyborow/2015_O_simplified.csv")
wyn_15

Unnamed: 0_level_0,Blue,Red,Gray
województwo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DOLNOŚLĄSKIE,499149.0,512619.0,102011.0
KUJAWSKO-POMORSKIE,341143.0,331863.0,63790.0
LUBELSKIE,552573.0,210951.0,63895.0
LUBUSKIE,145904.0,166957.0,33365.0
MAZOWIECKIE,1294629.0,990704.0,251372.0
MAŁOPOLSKIE,857057.0,435178.0,124009.0
OPOLSKIE,148923.0,135724.0,53622.0
PODKARPACKIE,584004.0,182876.0,67287.0
PODLASKIE,272200.0,128270.0,34922.0
POMORSKIE,368078.0,441121.0,84691.0


## 2019

In [51]:
wyniki = pd.read_csv("wyniki_wyborow/2019_O.csv")
col = wyniki.columns.values.tolist()

col[0] = 'województwo'
wyniki.columns = col

wyniki = wyniki.fillna(0)

wyn_19 = wyniki.iloc[2:,:]

wyn_19['województwo'] = [x.upper() for x in wyn_19['województwo']]

for c in wyn_19.columns[1:]:
    wyn_19[c] = wyn_19[c].astype(float)

wyn_19

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wyn_19['województwo'] = [x.upper() for x in wyn_19['województwo']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wyn_19[c] = wyn_19[c].astype(float)


Unnamed: 0,województwo,PO,KOMITET WYBORCZY AKCJA ZAWIEDZIONYCH EMERYTÓW RENCISTÓW - ZPOW-601-21/19,KORWIN/ Wolnośc/ KONFEDERACJA,PSL,KOMITET WYBORCZY PRAWICA - ZPOW-601-20/19,PiS,KOMITET WYBORCZY SKUTECZNI PIOTRA LIROYA-MARCA - ZPOW-601-17/19,SLD,KOMITET WYBORCZY WYBORCÓW KOALICJA BEZPARTYJNI I SAMORZĄDOWCY - ZPOW-601-10/19,KOMITET WYBORCZY WYBORCÓW MNIEJSZOŚĆ NIEMIECKA - ZPOW-601-15/19
2,DOLNOŚLĄSKIE,413632.0,0.0,89440.0,93803.0,0.0,525007.0,0.0,206861.0,41150.0,0.0
3,KUJAWSKO-POMORSKIE,262370.0,0.0,61031.0,90722.0,0.0,350198.0,0.0,136839.0,11152.0,0.0
4,LUBELSKIE,168586.0,0.0,63451.0,99078.0,0.0,552086.0,0.0,71556.0,12158.0,0.0
5,LUBUSKIE,136955.0,0.0,31490.0,50943.0,0.0,150188.0,0.0,68341.0,0.0,0.0
6,MAZOWIECKIE,935365.0,1412.0,212037.0,261664.0,0.0,1283424.0,2503.0,416349.0,26366.0,0.0
7,MAŁOPOLSKIE,370579.0,0.0,124631.0,145751.0,1765.0,876381.0,0.0,154467.0,9214.0,0.0
8,OPOLSKIE,108570.0,0.0,23176.0,41901.0,0.0,152999.0,0.0,47699.0,0.0,32094.0
9,PODKARPACKIE,146949.0,0.0,75215.0,76523.0,0.0,614756.0,3530.0,62394.0,0.0,0.0
10,PODLASKIE,109527.0,1775.0,36207.0,48566.0,0.0,270888.0,2272.0,47342.0,4001.0,0.0
11,POMORSKIE,426692.0,0.0,80517.0,77335.0,0.0,381335.0,0.0,143672.0,0.0,0.0


In [52]:
col_b = ['PiS','KORWIN/ Wolnośc/ KONFEDERACJA','PSL']
col_r = ['SLD','PO']#,'Wiosna','Razem']
col_g = [c for c in wyn_19.columns.values.tolist() if c not in col_r+col_b+['województwo','nr','okręgi','powiaty']]
wyn_19['Blue'] = wyn_19.loc[:, col_b].sum(axis = 1, skipna = True) 
wyn_19['Red'] = wyn_19.loc[:, col_r].sum(axis = 1, skipna = True) 
wyn_19['Gray'] = wyn_19.loc[:, col_g].sum(axis = 1, skipna = True) 
wyn_19

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wyn_19['Blue'] = wyn_19.loc[:, col_b].sum(axis = 1, skipna = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wyn_19['Red'] = wyn_19.loc[:, col_r].sum(axis = 1, skipna = True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wyn_19['Gray'] = wyn_19.loc[:, col_g].sum(axis = 1, skipna = True)


Unnamed: 0,województwo,PO,KOMITET WYBORCZY AKCJA ZAWIEDZIONYCH EMERYTÓW RENCISTÓW - ZPOW-601-21/19,KORWIN/ Wolnośc/ KONFEDERACJA,PSL,KOMITET WYBORCZY PRAWICA - ZPOW-601-20/19,PiS,KOMITET WYBORCZY SKUTECZNI PIOTRA LIROYA-MARCA - ZPOW-601-17/19,SLD,KOMITET WYBORCZY WYBORCÓW KOALICJA BEZPARTYJNI I SAMORZĄDOWCY - ZPOW-601-10/19,KOMITET WYBORCZY WYBORCÓW MNIEJSZOŚĆ NIEMIECKA - ZPOW-601-15/19,Blue,Red,Gray
2,DOLNOŚLĄSKIE,413632.0,0.0,89440.0,93803.0,0.0,525007.0,0.0,206861.0,41150.0,0.0,708250.0,620493.0,41150.0
3,KUJAWSKO-POMORSKIE,262370.0,0.0,61031.0,90722.0,0.0,350198.0,0.0,136839.0,11152.0,0.0,501951.0,399209.0,11152.0
4,LUBELSKIE,168586.0,0.0,63451.0,99078.0,0.0,552086.0,0.0,71556.0,12158.0,0.0,714615.0,240142.0,12158.0
5,LUBUSKIE,136955.0,0.0,31490.0,50943.0,0.0,150188.0,0.0,68341.0,0.0,0.0,232621.0,205296.0,0.0
6,MAZOWIECKIE,935365.0,1412.0,212037.0,261664.0,0.0,1283424.0,2503.0,416349.0,26366.0,0.0,1757125.0,1351714.0,30281.0
7,MAŁOPOLSKIE,370579.0,0.0,124631.0,145751.0,1765.0,876381.0,0.0,154467.0,9214.0,0.0,1146763.0,525046.0,10979.0
8,OPOLSKIE,108570.0,0.0,23176.0,41901.0,0.0,152999.0,0.0,47699.0,0.0,32094.0,218076.0,156269.0,32094.0
9,PODKARPACKIE,146949.0,0.0,75215.0,76523.0,0.0,614756.0,3530.0,62394.0,0.0,0.0,766494.0,209343.0,3530.0
10,PODLASKIE,109527.0,1775.0,36207.0,48566.0,0.0,270888.0,2272.0,47342.0,4001.0,0.0,355661.0,156869.0,8048.0
11,POMORSKIE,426692.0,0.0,80517.0,77335.0,0.0,381335.0,0.0,143672.0,0.0,0.0,539187.0,570364.0,0.0


In [53]:
wyn_19 = wyn_19.groupby('województwo').agg(np.sum)
wyn_19 = wyn_19[['Blue','Red','Gray']]
wyn_19.to_csv("wyniki_wyborow/2019_O_simplified.csv")
wyn_19

Unnamed: 0_level_0,Blue,Red,Gray
województwo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DOLNOŚLĄSKIE,708250.0,620493.0,41150.0
KUJAWSKO-POMORSKIE,501951.0,399209.0,11152.0
LUBELSKIE,714615.0,240142.0,12158.0
LUBUSKIE,232621.0,205296.0,0.0
MAZOWIECKIE,1757125.0,1351714.0,30281.0
MAŁOPOLSKIE,1146763.0,525046.0,10979.0
OPOLSKIE,218076.0,156269.0,32094.0
PODKARPACKIE,766494.0,209343.0,3530.0
PODLASKIE,355661.0,156869.0,8048.0
POMORSKIE,539187.0,570364.0,0.0


## Read dict with neighbours

In [8]:
with open("wojew_neighbours.pkl", "rb") as f:
    output = pickle.load(f)

print(output)

{'śląskie': ['opolskie', 'świętokrzyskie', 'łódzkie', 'małopolskie'], 'opolskie': ['śląskie', 'wielkopolskie', 'łódzkie', 'dolnośląskie'], 'wielkopolskie': ['opolskie', 'zachodniopomorskie', 'kujawsko-pomorskie', 'dolnośląskie', 'pomorskie', 'łódzkie', 'lubuskie'], 'zachodniopomorskie': ['wielkopolskie', 'pomorskie', 'lubuskie'], 'świętokrzyskie': ['śląskie', 'podkarpackie', 'małopolskie', 'łódzkie', 'mazowieckie', 'lubelskie'], 'kujawsko-pomorskie': ['wielkopolskie', 'pomorskie', 'warmińsko-mazurskie', 'łódzkie', 'mazowieckie'], 'podlaskie': ['warmińsko-mazurskie', 'mazowieckie', 'lubelskie'], 'dolnośląskie': ['opolskie', 'wielkopolskie', 'lubuskie'], 'podkarpackie': ['małopolskie', 'świętokrzyskie', 'lubelskie'], 'małopolskie': ['śląskie', 'podkarpackie', 'świętokrzyskie'], 'pomorskie': ['warmińsko-mazurskie', 'wielkopolskie', 'zachodniopomorskie', 'kujawsko-pomorskie'], 'warmińsko-mazurskie': ['pomorskie', 'mazowieckie', 'kujawsko-pomorskie', 'podlaskie'], 'łódzkie': ['śląskie', 'op