In [1]:
import pandas as pd
from anytree import Node, RenderTree, search

In [2]:
def make_map(list_child_parent):
    has_parent = set()
    all_items = {}
    
    for child, parent in list_child_parent:
        if parent not in all_items:
            all_items[parent] = {}
            
        if child not in all_items:
            all_items[child] = {}
        all_items[parent][child] = all_items[child]
        has_parent.add(child)

    result = {}
    
    for key, value in all_items.items():
        if key not in has_parent:
            result[key] = value
    
    return result

def createTree(tree_dict, root):
    for key, item in tree_dict.items():
        child = Node(key, parent=root)

        if tree_dict[key] != '':
            createTree(tree_dict[key], child)
        else:
            return
        
def traceConversation(dataframe, tree, node):
    parent = search.find_by_attr(tree, node).children
    
    print("All child nodes:")
    children_nodes_list = getAllChildNodes(tree, node, [])
    
    return dataframe[(dataframe['reply_to'].isin(children_nodes_list)) | (dataframe['id'].isin(children_nodes_list + [node]))]

def getAllChildNodes(tree, node, children_nodes_list):
    children_nodes = search.find_by_attr(tree, node).children
    
    for i in children_nodes:
        print(i.name)
        children_nodes_list.append(i.name)
        
        if i.children != None:
            getAllChildNodes(tree, i.name, children_nodes_list)
            
        else:
            return
            
    return children_nodes_list

In [3]:
df = pd.read_csv('Datasets/conversation_sentiment.csv')

# df = pd.read_csv('Datasets/sample_tweet_conversation.csv', dtype=object)

# df['id'] = df['id'].astype(str)
# df['reply_to'] = df['reply_to'].astype(str)

df = df.drop(columns=['Unnamed: 0'])
df

Unnamed: 0,Unnamed: 0.1,user_name,id,timestamp,reply_to,comment,sentiment
0,0,MapleViolet,hpr2kav,2021-12-24 08:55:24,rmqevj,All I know is - anyone trying to pull a fast o...,0.002475
1,1,applescript16,hpntm2t,2021-12-23 16:24:16,rmqevj,Here’s some perspective: \n\n1) The public nat...,0.001399
2,2,iluj13,hpnwekg,2021-12-23 17:01:54,hpntm2t,Well said. It’s only a problem if your party i...,0.121548
3,3,forzenrose,hpnzb4r,2021-12-23 17:41:51,hpnwekg,&gt;Transparency and finding out the truth is ...,0.915093
4,4,sec5,hpo5qkg,2021-12-23 19:10:22,hpnzb4r,'Im so done with RK Saga because it sucks to b...,0.002387
...,...,...,...,...,...,...,...
177,177,hexquisite,hpobx5f,2021-12-23 20:25:53,rmqevj,This dum bish needs to learn from the pap fuck...,0.001783
178,178,A-Chicken,hpr73zc,2021-12-24 09:32:25,rmqevj,"I'm sorry, but this is the opposition we're ta...",0.033873
179,179,PublicWar5,hpp9czp,2021-12-24 00:57:35,rmqevj,"Honestly I hate the COP, I hate how much of a ...",0.000711
180,180,etyn100,hpnw8qy,2021-12-23 16:59:45,rmqevj,Gotta milk some political brownies points,0.013773


## Tree of Comments

In [4]:
parent = 'rmqevj'
# parent = '18831926'
root = Node(parent)

input_list = [] 

for i in range(len(df['id'].tolist())):
    if df['id'].loc[i] != df['reply_to'].loc[i]:
        input_list.append((df['id'].loc[i], df['reply_to'].loc[i]))

output_dict = make_map(input_list)
createTree(output_dict[parent], root)

In [5]:
for pre, fill, node in RenderTree(root):
    print("%s%s" % (pre, node.name))

rmqevj
├── hpr2kav
├── hpntm2t
│   ├── hpnwekg
│   │   ├── hpnzb4r
│   │   │   └── hpo5qkg
│   │   │       └── hpokj06
│   │   │           ├── hpp0qgm
│   │   │           ├── hpoqxz5
│   │   │           └── hpraj0s
│   │   │               └── hpshgjh
│   │   ├── hpo2pl5
│   │   │   └── hpo7oxl
│   │   │       └── hpo7u0x
│   │   ├── hpo76sq
│   │   ├── hpo7l9d
│   │   ├── hpradku
│   │   │   ├── hps0rop
│   │   │   └── hprqpon
│   │   ├── hpoya8b
│   │   └── hpnwve6
│   │       └── hpnz3vc
│   │           ├── hpp0vzg
│   │           └── hpo6fwl
│   │               └── hpp7l4j
│   ├── hpo1src
│   │   ├── hpolpfk
│   │   ├── hpo33t8
│   │   │   ├── hpo4que
│   │   │   │   └── hpom3uq
│   │   │   │       └── hprayhb
│   │   │   │           └── hpruuh0
│   │   │   └── hpoq8hi
│   │   │       └── hppkszp
│   │   │           └── hpppv0m
│   │   └── hpo6898
│   │       └── hpohz2e
│   ├── hpnx97j
│   │   ├── hpnxtjg
│   │   │   ├── hpo0tg0
│   │   │   │   ├── hpo1tc3
│   │   │   │   └── hprbg

In [6]:
conversation = traceConversation(df, root, 'hpo5qkg')
conversation

All child nodes:
hpokj06
hpp0qgm
hpoqxz5
hpraj0s
hpshgjh


Unnamed: 0,Unnamed: 0.1,user_name,id,timestamp,reply_to,comment,sentiment
4,4,sec5,hpo5qkg,2021-12-23 19:10:22,hpnzb4r,'Im so done with RK Saga because it sucks to b...,0.002387
5,5,2late2realise,hpokj06,2021-12-23 21:50:48,hpo5qkg,how about feeling woke criticizing both PAP an...,0.008179
6,6,sid111111,hpp0qgm,2021-12-23 23:56:49,hpokj06,That's not being woke. That's just being ratio...,0.90672
7,7,raidorz,hpoqxz5,2021-12-23 22:43:53,hpokj06,"Nah, ""wokeness"" is only reserved for Oppositio...",0.009884
8,8,trashmaker,hpraj0s,2021-12-24 09:59:55,hpokj06,Well the thing is a lot of these 'Woke' people...,0.011356
9,9,TheElevatedWalrus,hpshgjh,2021-12-24 17:14:15,hpraj0s,lmao I remember being downvoted for comparing ...,0.009317


In [7]:
def confirmationBiasScore(scores, current_score, threshold = 0.5):
    positive_evidence = 0
    negative_evidence = 0
    
    for i in scores:
        if i < threshold:
            negative_evidence += 1
        else:
            positive_evidence += 1

    if current_score < threshold:
        H1 = 1 - current_score
        H2 = current_score
        
    else:
        H1 = current_score
        H2 = 1 - current_score
#     print(H1, H2)
            
    # Assume D1 is positive evidence
    D1 = positive_evidence / (positive_evidence + negative_evidence)
    D2 = negative_evidence / (positive_evidence + negative_evidence)
#     print(D1, D2)
    
    prob_D1_H1 = (D1 * H1) / ((D1 * H1) + (D2 * H1))
#     print(D1 * H1, D2 * H1)
    prob_D1_H2 = (D1 * H2) / ((D1 * H2) + (D2 * H2))
#     print(D1 * H2, D2 * H2)
    
    return prob_D1_H1, prob_D1_H2

In [8]:
count = 0
previous_scores = []

for i in conversation.iloc():
    if count != 0:
        print(i['comment'], i['sentiment'])
        print('Bias Score:', confirmationBiasScore(previous_scores, i['sentiment']))
        print()
        
    previous_scores.append(i['sentiment'])
    
    count += 1

how about feeling woke criticizing both PAP and the WP on their undeniable mistakes? 0.0081791784614324
Bias Score: (0.0, 0.0)

That's not being woke. That's just being rational. 0.9067203998565674
Bias Score: (0.0, 0.0)

Nah, "wokeness" is only reserved for Opposition supporters here. 0.0098841013386845
Bias Score: (0.33333333333333337, 0.3333333333333333)

Well the thing is a lot of these 'Woke' people somehow also tend to be against women's rights and against LGBTQ+ rights, and let me not even get into the foreigners topic.

The wokeness is very limited in scope. 0.0113557334989309
Bias Score: (0.25, 0.25)

lmao I remember being downvoted for comparing Lim Tean's nativist shit to certain American politicians 0.009316568262875
Bias Score: (0.2, 0.2)

