<a href="https://colab.research.google.com/github/Oliz888/voting_and_consensus/blob/main/voting_and_consensus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
### Setting up Google Colab (optional) ###
# Installing necessary packages
!pip install python-igraph
!pip install leidenalg
!pip install umap

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-igraph
  Downloading python-igraph-0.10.2.tar.gz (9.5 kB)
Collecting igraph==0.10.2
  Downloading igraph-0.10.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[K     |████████████████████████████████| 3.2 MB 6.4 MB/s 
[?25hCollecting texttable>=1.6.2
  Downloading texttable-1.6.4-py2.py3-none-any.whl (10 kB)
Building wheels for collected packages: python-igraph
  Building wheel for python-igraph (setup.py) ... [?25l[?25hdone
  Created wheel for python-igraph: filename=python_igraph-0.10.2-py3-none-any.whl size=9074 sha256=27b043cac730ffa870916f3d69e84ea15078fe5c54d31be3db03e18105d71464
  Stored in directory: /root/.cache/pip/wheels/74/1d/33/83ec153aa37c92957b2edb888f4f42fdea9b7ac89f4b919391
Successfully built python-igraph
Installing collected packages: texttable, igraph, python-igraph
Successfully installed igraph-0.10.2 python-igraph-0.10.2 text

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import altair as alt
from textwrap import wrap
from sklearn.neighbors import kneighbors_graph

import umap

import igraph as ig
import leidenalg

from sklearn.decomposition import PCA



In [None]:
# Getting access to Google Drive files (participants-votes.csv etc)
from google.colab import drive
drive.mount("/content/drive")

comments= pd.read_csv("/content/drive/MyDrive/Colab Notebooks/comments.csv")
votes = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/participants-votes.csv")

Mounted at /content/drive


In [None]:
# examine the shape and head of df 
comments.shape
comments.head()
comments.dtypes

timestamp        int64
datetime        object
comment-id       int64
author-id        int64
agrees           int64
disagrees        int64
moderated        int64
comment-body    object
dtype: object

In [None]:
votes.shape
votes.head()


Unnamed: 0,participant,group-id,n-comments,n-votes,n-agree,n-disagree,0,1,2,3,...,187,188,189,190,191,192,193,194,195,196
0,0,0.0,46,101,44,12,1.0,1.0,1.0,1.0,...,,,,,,,,,,
1,1,1.0,0,29,14,10,1.0,-1.0,-1.0,1.0,...,,,,,,,,,,
2,2,1.0,5,88,58,15,1.0,-1.0,-1.0,1.0,...,,,,,,,,,,
3,4,1.0,0,31,13,16,1.0,-1.0,-1.0,1.0,...,,,,,,,,,,
4,5,1.0,0,11,0,0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [None]:
comments.index = comments.index.astype(str)

In [None]:
# remove statements (columns) which were moderated out
statements_all_in = sorted(list(comments.loc[comments["moderated"] > 0].index.array), key = int)

In [None]:
## obtain the number of issues for votes 
metadata_fields = ['group-id', 'n-comments', 'n-votes', 
                   'n-agree', 'n-disagree']
val_fields = [c for c in votes.columns.values if c not in metadata_fields]

## there are 196 issues (comments left by other participants) for votes to opt

In [None]:
## for a row, count the number of finite values
def count_finite(row):
    finite = np.isfinite(row[val_fields]) # boolean array of whether each entry is finite
    return sum(finite) # count number of True values in `finite`


## REMOVE PARTICIPANTS WITH LESS THAN N VOTES check for each row if the number of finite values >= cutoff
def select_rows(df, threshold):
    
    number_of_votes = votes.apply(count_finite, axis=1)
    valid = number_of_votes >= threshold
    
    return votes[valid]

df_votes = select_rows(votes, 7)

In [None]:
metadata = df_votes[metadata_fields]
vals = df_votes[val_fields]
# If the participant didn't see the statement, it's a null value, here we fill in the nulls with zeros
vals = vals.fillna(0)
vals = vals.sort_values("participant")
vals 



Unnamed: 0,participant,0,1,2,3,4,5,6,7,8,...,187,188,189,190,191,192,193,194,195,196
0,0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1.0,-1.0,-1.0,1.0,1.0,-1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,1.0,-1.0,-1.0,1.0,1.0,0.0,-1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
988,4654,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
897,4655,0.0,0.0,0.0,-1.0,1.0,-1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1125,4659,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1261,4661,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
## calculating the variances for the column in vars to explore the degree of divergence in opinion 

val_variances = vals.drop("participant",axis=1).var()
df_variances = pd.DataFrame(val_variances)
df_variances.columns = ['val_var']
df_variances

df_variances['index'] = df_variances['index'].astype(float)


In [None]:
## merge the variances dataframe with the comment dataframe to obtain the attribute 


In [None]:
comments_var = comments.merge(df_variances, 
                              left_on = "comment-id",
                              right_on = "index")
comments_var

Unnamed: 0,timestamp,datetime,comment-id,author-id,agrees,disagrees,moderated,comment-body,index,val_var
0,1472552518501,Tue Aug 30 17:21:58 WIB 2016,194,4413,1,0,0,Uber的出現 因服務內容明確、平台簡潔、交易程序精簡、路線糾紛減少，並有評價機制穩定服務雙...,194.0,0.000788
1,1472403087787,Sun Aug 28 23:51:27 WIB 2016,187,4378,1,0,0,uber就是叫派計程車，談不上運用閒置資源或是共乘,187.0,0.000000
2,1451878620176,Mon Jan 04 10:37:00 WIB 2016,183,4120,1,0,0,探討法律、稅務、安全等問題時，應先理解供需，為什麼民眾會選擇Uber？簡單說就是機會成本的選...,183.0,0.000000
3,1438228490557,Thu Jul 30 10:54:50 WIB 2015,138,1476,1,0,-1,Uner只不過是用多餘的時間來賺取微薄的收入，黃牌不用繳交所得稅外還有補助，那麼我們上班族都...,138.0,0.000788
4,1438155033741,Wed Jul 29 14:30:33 WIB 2015,130,1960,1,0,-1,我覺得搭乘時完全不用擔心身上太多的零錢或是找不開的金額鈔票，一來可以讓司機放心身上不用帶太多...,130.0,0.000788
...,...,...,...,...,...,...,...,...,...,...
192,1472550225914,Tue Aug 30 16:43:45 WIB 2016,191,4412,1,0,0,我覺得不能用計程車司機的素質不佳來當作推廣Uber的理由，但是計程車的管理方式應該要重新的思考,191.0,0.000788
193,1472550484869,Tue Aug 30 16:48:04 WIB 2016,192,4412,1,0,0,我覺得針對此案的相關法律調整，必須要快點進行，不能因為現在Uber的司機不多而認為影響人民日...,192.0,0.000788
194,1472551357201,Tue Aug 30 17:02:37 WIB 2016,193,4412,1,0,0,我覺得要表達意見的人，應該要有多次搭乘Uber的經驗，所表達的意見才會有參考的價值。,193.0,0.000788
195,1473020958833,Mon Sep 05 03:29:18 WIB 2016,195,4441,1,0,0,我覺得作為公共運輸分uber應該招募一定比例的車子在偏鄉作業。,195.0,0.000788


In [None]:
# get the classfication of high variances and low variances comments 
comments_var = comments_var.sort_values(by = "val_var", ascending = False).reset_index()
high_varainces = comments_var.head(10)
statement_consensus = comments_var.tail(10)
high_varainces 


ValueError: ignored

In [None]:
high_varainces 

Unnamed: 0,level_0,timestamp,datetime,comment-id,author-id,agrees,disagrees,moderated,comment-body,index,val_var
0,179,1435730079153,Wed Jul 01 12:54:39 WIB 2015,13,0,291,374,1,我覺得計程車身一定要塗裝成黃色的，和其他車輛顏色不同。,13.0,0.507584
1,34,1436965020693,Wed Jul 15 19:57:00 WIB 2015,18,0,319,333,1,我覺得UberX目前未依法營業，讓我覺得搭乘時有風險。,18.0,0.502854
2,35,1436965031690,Wed Jul 15 19:57:11 WIB 2015,19,0,333,318,1,我覺得UberX管理制度不夠透明，讓我難以安心。,19.0,0.500773
3,37,1436965034304,Wed Jul 15 19:57:14 WIB 2015,20,0,290,371,1,我覺得UberX已與現有國內運輸業產生不公平競爭的情形。,20.0,0.499374
4,170,1435654477147,Tue Jun 30 15:54:37 WIB 2015,6,0,429,236,1,我覺得載客的車子上應該要有明確標示。,6.0,0.490098
5,176,1435667819880,Tue Jun 30 19:36:59 WIB 2015,12,20,440,230,1,Uber 是媒合平台，就像拍賣網站，屬於資訊業。,12.0,0.484064
6,49,1437105740751,Fri Jul 17 11:02:20 WIB 2015,32,0,426,231,1,我覺得 UberX 目前無法幫乘客保意外險，讓我感到沒有保障。,32.0,0.482849
7,46,1437105174039,Fri Jul 17 10:52:54 WIB 2015,29,0,300,328,1,我覺得交通部取締效果不彰，是公權力無能的表現。,29.0,0.475244
8,171,1435654449781,Tue Jun 30 15:54:09 WIB 2015,5,0,385,245,1,我覺得主動取締白牌車是交通部的責任。,5.0,0.474674
9,167,1435654192077,Tue Jun 30 15:49:52 WIB 2015,0,0,502,187,1,我有用過 Uber 叫車。,0.0,0.46847
