## 1) Stažení dat

 * funkce `request_page(id,first)` stáhne data o hlasování 
    ze stránky `https://www.psp.cz/sqw/hlasy.sqw?g={voting_id}`  s 'voting_id=id'
 * budeme iterovat přes všechny hlasování tj. `range(first, 73901)` 


In [3]:
### imports
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import time
import urllib
import os


#import fitz
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
from io import StringIO

from datetime import date

In [2]:
### flow ###
#pdfs = get_downloads()
#new_pdfs = download_new_files(pdfs)
#get_resolutions(new_pdfs)

In [None]:
def request_pdf_single(folder,url,page,file_prefix):
    full_url = f'{url}{page}'
    print(f"requesting data from: {full_url}")
    resp = requests.get(full_url)
    resp.encoding='utf-8'
    
    soup = BeautifulSoup(resp.content,from_encoding="utf-8")
    
    prefix = "https://www.novarole.cz/"
    
    
    for i,l in enumerate(soup.find_all("a", class_="file")):
        link = l.get('href')
        if "file_storage" in link:
            #print(l.get('download'))
            r = requests.get(f'{prefix}{link}', allow_redirects=True)
            file_name = l.get('download')
            path = f'data/{folder}/{file_prefix:03d}_{file_name}'
            if os.path.exists(path):
                file_prefix += 1
                continue
            file_prefix += 1
            open(path, 'wb').write(r.content)
            
            #urllib.request.urlretrieve(f'{prefix}{link}', f'/data/{i}.pdf')
    return file_prefix

def request_pdf(folder,url, first, last ,file_prefix = 0):
    if not os.path.exists(f'data/{folder}'):
        os.makedirs(f'data/{folder}')
    for i in range(first,last+1):
        file_prefix = request_pdf_single(folder,url,i,file_prefix)
        time.sleep(1)

In [None]:
request_pdf("rme","https://www.novarole.cz/samosprava/rada-mesta/usneseni-rady-mesta/?page=",1,8)
request_pdf("zme","https://www.novarole.cz/samosprava/zastupitelstvo-mesta/zapisy-z-jednani-zastupitelstva/?page=",1,5)

In [4]:
def extract_information(pdf_path):
    with fitz.open(pdf_path) as doc:
        text = ""
        for page in doc:
            for blk in page.getTextBlocks():
                print(blk)
                print()
    return text

def text_from_page(page):
    bloks = page.getTextBlocks()
    text = [blk[4].replace("\n", " ") for blk in bloks]
    return "\r\n".join(text)

In [4]:
class Resolution:
    def __init__(self, name:str, published:date, text:str, result:str):
        self.name = name
        self.published = published
        self.text = text
        self.result = result
        
    def __str__(self):
        return f'>>> {self.name} <<<\n {self.text} \n\n Result{self.result}'
    
    def parse_result(self):
        return
        

In [5]:
def extract_text(pdf_path):
    output_string = StringIO()
    with open(pdf_path, 'rb') as in_file:
        parser = PDFParser(in_file)
        doc = PDFDocument(parser)
        rsrcmgr = PDFResourceManager()
        device = TextConverter(rsrcmgr, output_string, laparams=LAParams())
        interpreter = PDFPageInterpreter(rsrcmgr, device)
        for page in PDFPage.create_pages(doc):
            interpreter.process_page(page)

    return output_string.getvalue()

In [6]:
class ResolutionZme(Resolution):
    def __init__(self, name:str, published:date, text:str, result:str, category:str, attendance):
        super().__init__(name,published,text,result)
        self.category = category
        self.attendance = attendance
        
    def __str__(self):
        return super().__str__() + f' :::{self.category}'



def parse_zme_format(lines, debug = False):
    date_pub = parse_new_date(lines[2])
    resolutions = []
    name = ""
    header = ""
    restext = ""
    result = ""
    
    attendance = []
    category = ''
    
    mode ="init"
    scanning = True
    for line in lines:
        if debug:
            print(f">>>{line}")
        if re.search(r'^Omluven[aiy]*:',line) and mode == "init":
            attendance = parse_attendance(line)
            mode = "firstscan"
            continue
        if mode == "firstscan" and re.search(r'^[0-9]*/03',line):
            category = line
            mode = "name"
            continue
        if mode == "scan":
            if re.search(r'^[0-9]+/[0-9]+',line):
                category = line
                mode = "name"
                continue
            else:
                category = "Unknown"
                mode = "name"
        if mode == "name":
            if re.search('^ZMě/',line):
                name = line
                mode = "name2"
                continue
            else:
                mode = "text"
        if mode == "name2":
            name += f' -- {line}'
            mode = "text"
            continue
        if mode == "text" and re.search(r'^Usnesení ',line):
            mode ="result"
        if mode == "result":
            result = line
            mode = "scan"
            resolutions.append(ResolutionZme(name,date_pub,restext,result,category,attendance))
            restext = ""
            continue
        if mode == "text":
            restext += line.strip()
    return resolutions

In [7]:
def parse_new_date(date_line):
    date_r = re.search(r'[1-3]*[0-9]. [1]*[0-9]. 20[1-3][0-9]',date_line)
    date_text = date_r.group(0)
    date_p = re.findall(r'\d+',date_text)
    return date.fromisoformat(f'{int(date_p[2])}-{int(date_p[1]):02d}-{int(date_p[0]):02d}')

In [8]:
representatives = ["Pokorná", "Krbcová", "Nesybová", "Dušková", "Tichá", 
                   "Škarda", "Sýkora", "Bartoň", "Švec", "Pastor", "Slíž",
                   "Pavlíček", "Niedermertl", "Cinegr", "Kvapil"]

In [9]:
def parse_attendance(text):
    attendance = []
    words = re.findall('\w+', text, re.UNICODE)
    for word in words:
        if word in representatives:
            attendance.append(word)
    return attendance

In [62]:
def parse_result(text, attendance):
    active = [x for x in representatives if x not in attendance]
    result = dict.fromkeys(representatives, "?")
    
    yes = 0
    no = 0
    neutral = 0
    missing = 0
    
    if re.search(r'^Usnesení schváleno',text):
        yes = int(text.split(' ')[3])
    #Nekdo se zdrzel nebo hlasoval proti:    
    if ',' in text:
        tokens = re.findall(r'\d \w+',text,re.UNICODE)
        votings = re.findall(r'\([^)]+\)',text,re.UNICODE)
        
        i = 0
        for s in tokens[1:]:
            split = s.split(' ')
            print(split)
            if split[1] == 'se':
                neutral = int(split[0])
                set_result(result,'Zdrzel',votings[i])
            elif split[1] == 'nepřítomen':
                missing = int(split[0])
                set_result(result,'Nepritomen',votings[i])
            elif split[1] == 'proti':
                no = int[split[0]]
                set_result(result,'Proti',votings[i])
            else:
                continue
            i += 1
        
        print(active)
        for a in active:
            if result[a] == "?":
                result[a] = "Pro"
        
    print(f'Y:{yes}, N:{no}, Z:{neutral}, M:{missing}')
    print(result)
        
def set_result(results, to, text):
    tokens = re.findall(r'\w+',text,re.UNICODE)
    for s in tokens:
        if s in results.keys():
            results[s] = to

In [63]:
def result_tostr(results):
    return ''

In [64]:
path = 'Zápis 26. ZMě 20. 4. 2022.pdf'




#print(date)
#resolutions = []
text = extract_text(path)

lines = text.splitlines();
while '' in lines:
    lines.remove('')
while ' ' in lines:
    lines.remove(' ')

#date = parse_new_date(lines[1])
#print(date)
#for s in lines:
    #print(f'>>>{s}')

res = parse_zme_format(lines)


r = res[10]
parse_result(r.result,r.attendance)
#for r in res:
    
    #print(r.published)
    #print('-===-==-===-')


['3', 'se']
['Pokorná', 'Nesybová', 'Dušková', 'Tichá', 'Škarda', 'Sýkora', 'Bartoň', 'Švec', 'Pastor', 'Slíž', 'Pavlíček', 'Niedermertl', 'Cinegr', 'Kvapil']
Y:11, N:0, Z:3, M:0
{'Pokorná': 'Pro', 'Krbcová': '?', 'Nesybová': 'Pro', 'Dušková': 'Pro', 'Tichá': 'Pro', 'Škarda': 'Zdrzel', 'Sýkora': 'Pro', 'Bartoň': 'Pro', 'Švec': 'Pro', 'Pastor': 'Zdrzel', 'Slíž': 'Zdrzel', 'Pavlíček': 'Pro', 'Niedermertl': 'Pro', 'Cinegr': 'Pro', 'Kvapil': 'Pro'}


In [54]:
def parse_attendance(text):
    attendance = []
    words = re.findall('\w+', text, re.UNICODE)
    for word in words:
        if word in representatives:
            attendance.append(word)
    return attendance

In [55]:
def parse_result(text, attendance):
    active = [x for x in representatives if x not in attendance]
    result = dict.fromkeys(representatives, "?")
    if re.search(r'^Usnesení schváleno',text):
        yes = int(text.split(' ')[3])
    #Nekdo se zdrzel nebo hlasoval proti:    
    if ',' in text:
        tokens = re.findall(r'\d \w+',text,re.UNICODE)
        for s in tokens[1:]:
            print(s)
        
            
        
# \([^)]*\) -- zavorky   

In [18]:
def result_tostr(results):
    return ''

In [20]:
path = 'Zápis 26. ZMě 20. 4. 2022.pdf'




#print(date)
#resolutions = []
text = extract_text(path)

lines = text.splitlines();
while '' in lines:
    lines.remove('')
while ' ' in lines:
    lines.remove(' ')

#date = parse_new_date(lines[1])
#print(date)
#for s in lines:
    #print(f'>>>{s}')

res = parse_zme_format(lines)


r = res[10]
parse_result(r.result,r.attendance)
#for r in res:
    
    #print(r.published)
    #print('-===-==-===-')


3 se


In [None]:
def request_page(id,first):
    voting_id = id
    url = f'https://www.psp.cz/sqw/hlasy.sqw?g={voting_id}&'
    resp = requests.get(url)
    resp.encoding='cp1250'
    soup = BeautifulSoup(resp.content,from_encoding="cp1250")

    # M - Omlouven
    # A/N ano/ne
    # Z - zdržel se
    # 0 - nepřihlášen
    d = {'voting_id': [],'voting_name': [], 'party': [],'name': [], 'voting':[]}

    name = re.split("..:..",soup.html.body.h1.text)
    #print(name[-1]) #jméno halsování
    soup.text[0:3000]
    soup.html.body
    #soup.find_all('li')[0].text
    #re.split("[ \xa0]",soup.find_all('li')[0].text) # hlasování jednotlivce
    re.split(" ",soup.find_all('h2')[2].text)[0]#strana

    parties = dict()
    parties_names = list()


    #print(soup.find_all('h2')[2])
    for i in soup.find_all('h2')[1:-2]:
        party_header = str(re.split(" \(",i.text)[0])#od 1 po -2
        party_sum_text = str(re.split(" \(",i.text)[1])
        re.split("A |0 |N |M |Z ",party_sum_text) #souhrn po stranách
        party_sum = re.split("A |0 |N |M |Z ",party_sum_text)
        party_sum = party_sum[1:]
        party_sum = [re.findall("[0-9]+",s) for s in party_sum]
        party_sum = [int(x[0]) for x in party_sum]
        party_sum = sum(party_sum)
        #print(f'{party_header}:{party_sum}')
        parties[party_header] = party_sum
        parties_names.append(party_header)
    #re.findall("[0-9]+",soup.find_all('h2')[2].text)


    curr_party = 0
    curr_person = 0
    #soup.html.body
    for i in range(200):
        person_voting = re.split("[ \xa0]",soup.find_all('li')[i].text)[0]
        person_name = re.split("A |0 |N |M |Z ",soup.find_all('li')[i].text)[1]
        #print(f'{person_name} hlasoval/a: {person_voting}')
        if(curr_person >= parties[parties_names[curr_party]]):
            curr_person = 0
            curr_party+=1
        curr_person+=1
        d['voting_id'].append(voting_id)
        d['voting_name'].append(name[-1])
        d['party'].append(parties_names[curr_party])
        d['name'].append(person_name)
        d['voting'].append(person_voting)
        #
    df = pd.DataFrame(data=d)
    if(voting_id == first):
        df.to_csv('test.csv',mode='w',header = True)
    else:
        df.to_csv('test.csv',mode='a',header = False)
    if voting_id%100 == 0:
        print(f'id:{voting_id} downloaded')
    time.sleep(1)

In [None]:
first = 67018
last = 73901


for i in range(68548, last):
    request_page(i,first)

## 2)Vizualizace

 *  

In [None]:
import numpy as np
import pandas as pd
import sklearn as skit
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import re
from matplotlib.sankey import Sankey


%matplotlib inline 
matplotlib.style.use('ggplot')

In [None]:
data = pd.read_csv('data.csv')


parties = dict()
n = ""
m = ""
for index,row in data.iterrows():
    if not row.get("party") in parties:
        parties[row.get("party")] = set()
    name = re.sub("\xa0", " ",row.get("name"))
    name = re.split('\(',name)[0]
    parties[row.get("party")].add(name)
#print(parties)

#  Docházka poslanců

 * a) Docházka jednotlivých poslanců -> 5 min a 5 max


In [None]:
#display(data.head(2))
#data['party']
#data[data['party'] == 'ODS'].head(100)

missing = data[data['voting'] == 'M']
grouped = missing.groupby(by=['name'])

missingbyNames = list()

for name,group in grouped:
    name = re.sub("\xa0", " ", name)
    missingbyNames.append((name, len(group),"MAX"))
missingbyNames = sorted(missingbyNames, key=lambda tup: tup[1])

missingbyNamesList = [(x[0],x[1],"MIN") for x in missingbyNames[:5]]
#missingbyNamesList += missingbyNames[-5:]
#print(missingbyNamesList)

f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,3))
ax1.set(ylim=(0, 100))
df = pd.DataFrame(missingbyNamesList, columns = ['Name', 'Missing', 'Group'])
#display(df.head())
g = sns.barplot(x=df['Name'],y =df['Missing'], ax = ax1)
g.set_title('Min')
ax=g

#annotate axis
for p in ax.patches:
             ax.annotate("%.2f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=11, color='gray', xytext=(0, 20),
                 textcoords='offset points')


df = pd.DataFrame(missingbyNames[-5:], columns = ['Name', 'Missing', 'Group'])
g = sns.barplot(x=df['Name'],y =df['Missing'], ax = ax2)
g.set_title('Max')
ax=g
#annotate axis
for p in ax.patches:
             ax.annotate("%i" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=11, color='gray', xytext=(0, 20),
                 textcoords='offset points')

#plt.show()



#  Docházka poslanců

 * b) Docházka jednotlivých stran tj. průměrná absence na poslance ve straně

In [None]:
#display(data.head(2))
#data['party']
#data[data['party'] == 'ODS'].head(100)

missing = data[data['voting'] == 'M']
grouped = missing.groupby(by=['party'])

missingbyParty = list()

for party,group in grouped:
    missingbyParty.append((party, len(group)/len(parties[party])))
    missingbyParty = sorted(missingbyParty, key=lambda tup: tup[1])
    

#missingbyNamesList += missingbyNames[-5:]
#print(missingbyParty)


df = pd.DataFrame(missingbyParty, columns = ['Name', 'Missing'])
plt.figure(figsize=(20,5))
g = sns.barplot(x=df['Name'],y =df['Missing'])
g.set_title('average missing per person in party')
ax=g

#annotate axis
for p in ax.patches:
             ax.annotate("%i" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=11, color='gray', xytext=(0, 20),
                 textcoords='offset points')
plt.show()

#  Přeběhnutí poslanci

 *  Grafické znázornění 'přebíhání' poslanců mezi stranami. Šipky zezhora znázorňují příchozí poslance a šipky zdezdola znázorňují odchozí

In [None]:
data = pd.read_csv('data.csv')

colors = {'ODS' : 'blue',
          'ČSSD' : 'orange',
          'SPD' : 'brown',
          'Nezařaz' : 'white'
         }


partyPerName = dict()
first = True
for index,row in data.iterrows():
    name = re.sub("\xa0", " ",row.get("name"))
    name = re.split('\(',name)[0]
    party = row.get("party")
    if not name in partyPerName:
        partyPerName[name] = list()
    if not party in partyPerName[name]:
        partyPerName[name].append(party)
    

In [None]:
traitors = dict()
for key,value in partyPerName.items():
    if len(value) > 1:
        traitors[key] = value

    

incoming = dict()
departing = dict()

for key,value in traitors.items():
    for x in value[1:]:
        if not x in incoming:
            incoming[x] = set()
        incoming[x].add(key)
    for x in value[:-1]:
        if not x in departing:
            departing[x] = set()
        departing[x].add(key)
        
for key,value in departing.items():
    _labels = [key]
    _labels.extend(value)
    
    _flows = [1]
    _orientations = [0]
    _pathlengths = [0.25]
    sum = 0
    for i in range(len(value)):
        _flows.append(-0.1)
        _orientations.append(-1)
        _pathlengths.append((i+1)/10)
        sum += -0.1
    if key in incoming:
        for x in range(len(incoming[key])):
            _flows.append(0.1)
            _orientations.append(1)
            _pathlengths.append((x+1)/4)
            sum += 0.1
        _labels.extend(incoming[key])
    _flows.append(-1 - sum)
    _orientations.append(0)
    _pathlengths.append(0.25)
    _labels.append(key)
    
    Sankey(flows= _flows,
           labels= _labels,
           orientations=_orientations,
           pathlengths = _pathlengths,
           facecolor= colors[key],
           scale=1).finish()
    plt.title(key)



#  Ne/shoda jednotlivých stran

 *  Grafické znázornění shody a neshody jednotlivých stran. Čím tmavší políčko, tím větší neshoda. V potaz se berou pouze hlasy: Ano a Ne. Zdržení/remíza/absence celé strany na hlasování nemá vliv na data. Nezařazení poslanci nejsou zahrnuti

In [None]:
data4 = pd.read_csv('data.csv')
parties = {'ANO' , 'Piráti', 'ODS', 'ČSSD', 'SPD', 'KSČM', 'STAN', 'TOP09', 'KDU-ČSL'}
num_rows = data4.shape[0]
votings = num_rows//200

cor_matrix = dict()
for party in parties:
    cor_matrix[party] = {}
    for party2 in parties:            
        cor_matrix[party][party2] = 0

In [None]:
for i in range(votings):
    voting = data4[0 + i* 200:200 + i* 200]
    #print(voting)
    results = voting[(voting['voting'] == 'N') | (voting['voting'] == 'A')]
    groupedResults = results.groupby(['party','voting']).count().sort_values('name',ascending= False)

    #print(groupedResults)


    yes = set()
    no = set()


    for row in groupedResults.iterrows():
        key = row[0]
        if(key[0] not in yes) and (key[0] not in no) and (key[0] in parties):
            if key[1] == 'A':
                yes.add(key[0])
            else:
                no.add(key[0])

    for party in yes:
        for party2 in yes:
            cor_matrix[party][party2] += 1/votings
    for party in no:
        for party2 in no:
            cor_matrix[party][party2] += 1/votings



In [None]:
df = pd.DataFrame(cor_matrix)
plt.figure(figsize=(14,12))
sns.heatmap(df)

#  Jednotnost poslanců ve stranách

 *  Graf znázorňuje jednotnost hlasování poslanců ve stranách
 *  Čím větší hodnota(value) tím větší "rebelismus"
 *  Čím více koncentrované body každé strany, tím více je strana jednotnější. 

In [None]:
data5 = pd.read_csv('data.csv')
parties = {'ANO' , 'Piráti', 'ODS', 'ČSSD', 'SPD', 'KSČM', 'STAN', 'TOP09', 'KDU-ČSL'}
num_rows = data5.shape[0]
votings = num_rows//200
rebels = dict()

In [None]:
for i in range(votings):
    voting = data5[0 + i* 200:200 + i* 200]
    #print(voting)
    results = voting[(voting['voting'] != 'M') & (voting['voting'] != '0')]
    groupedResults = results.groupby(['party','voting']).count().sort_values('name',ascending= False)

    #print(groupedResults)


    most = dict()
    for row in groupedResults.iterrows():
        key = row[0]
        if key[0] not in most:
            most[key[0]] = key[1]



    for row in voting.iterrows():
        party = row[1]['party']
        vote = row[1]['voting']
        name = re.split('\(',re.sub("\xa0", " ",row[1]['name']))[0]
        if (vote != 'Z') and (vote != 'M')  and (vote != '0') and (party in parties):
            if (name,party) not in rebels:
                rebels[(name,party)] = 0
            if most[party] != vote:
                rebels[(name,party)] += 1/votings;
                
#print(rebels)

            

In [None]:
data = list()
for key,value in rebels.items():
    data.append((key[0],key[1],value))
#print(data)

df2 = pd.DataFrame(data = data, columns = ['name','party','value'])
df2.head()
plt.figure(figsize=(12,4))
sns.stripplot(x="party", y="value", data=df2, jitter=True)

In [None]:
df3 = df2.sort_values('value',ascending= False)
print(df3)

#  Bonus: vizualizace pro zajímavá hlasování:

 *  Ne/shoda jednotlivých stran
 *  Jednotnost poslanců ve stranách

In [None]:
int_data = pd.read_csv('data.csv')
int_data = int_data.dropna()
num_rows = int_data.shape[0]

pattern= 'výbor|schůze|Pořad |Inf.|Náv.|Návrh'
indexes_to_drop = list()
for row in int_data.iterrows():
    if(re.search(pattern,row[1][2]) != None):
        indexes_to_drop.append( row[0])
        

int_data = int_data.drop(indexes_to_drop)
int_data.head(403)


In [None]:
parties = {'ANO' , 'Piráti', 'ODS', 'ČSSD', 'SPD', 'KSČM', 'STAN', 'TOP09', 'KDU-ČSL'}
num_rows = int_data.shape[0]
votings = num_rows//200

cor_matrix = dict()
for party in parties:
    cor_matrix[party] = {}
    for party2 in parties:            
        cor_matrix[party][party2] = 0

In [None]:
for i in range(votings):
    voting = int_data[0 + i* 200:200 + i* 200]
    #print(voting)
    results = voting[(voting['voting'] == 'N') | (voting['voting'] == 'A')]
    groupedResults = results.groupby(['party','voting']).count().sort_values('name',ascending= False)

    #print(groupedResults)


    yes = set()
    no = set()


    for row in groupedResults.iterrows():
        key = row[0]
        if(key[0] not in yes) and (key[0] not in no) and (key[0] in parties):
            if key[1] == 'A':
                yes.add(key[0])
            else:
                no.add(key[0])

    for party in yes:
        for party2 in yes:
            cor_matrix[party][party2] += 1/votings
    for party in no:
        for party2 in no:
            cor_matrix[party][party2] += 1/votings


In [None]:
df = pd.DataFrame(cor_matrix)
plt.figure(figsize=(14,12))
sns.heatmap(df)

 *  Jednotnost poslanců ve stranách

In [None]:
parties = {'ANO' , 'Piráti', 'ODS', 'ČSSD', 'SPD', 'KSČM', 'STAN', 'TOP09', 'KDU-ČSL'}
num_rows = data5.shape[0]
votings = num_rows//200
rebels = dict()

In [None]:
for i in range(votings):
    voting = data5[0 + i* 200:200 + i* 200]
    #print(voting)
    results = voting[(voting['voting'] != 'M') & (voting['voting'] != '0')]
    groupedResults = results.groupby(['party','voting']).count().sort_values('name',ascending= False)

    #print(groupedResults)


    most = dict()
    for row in groupedResults.iterrows():
        key = row[0]
        if key[0] not in most:
            most[key[0]] = key[1]



    for row in voting.iterrows():
        party = row[1]['party']
        vote = row[1]['voting']
        name = re.split('\(',re.sub("\xa0", " ",row[1]['name']))[0]
        if (vote != 'Z') and (vote != 'M')  and (vote != '0') and (party in parties):
            if (name,party) not in rebels:
                rebels[(name,party)] = 0
            if most[party] != vote:
                rebels[(name,party)] += 1/votings;
                
#print(rebels)


In [None]:
data = list()
for key,value in rebels.items():
    data.append((key[0],key[1],value))
#print(data)

df2 = pd.DataFrame(data = data, columns = ['name','party','value'])
df2.head()
plt.figure(figsize=(12,4))
sns.stripplot(x="party", y="value", data=df2, jitter=True)