### Representing entities as nodes

In [18]:
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import matplotlib.pyplot as plt
from collections import Counter

possible_sents = ["positive", "negative", "neutral"]
sent_proportions = {'neutral': 3789399, 'positive': 1910713, 'negative': 1663099}

with open(Path("cache") / "entties.dict.pkl", "rb") as f:
    entities = pickle.load(f)

In [19]:
for ent, data in tqdm(list(entities.items())):
    # first degree sentiment: just their own weighted sentiment
    first_degree_sentiment = (possible_sents[
        np.argmax([data[sent]/sent_proportions[sent] for sent in ["positive", "negative"]])
    ])
    
    entities[ent]["first_degree"] = first_degree_sentiment

list(entities["armenia"].keys())

  0%|          | 0/56945 [00:00<?, ?it/s]

100%|██████████| 56945/56945 [00:00<00:00, 140990.31it/s]


['positive', 'negative', 'neutral', 'connections', 'first_degree']

In [30]:
for ent, data in tqdm(list(entities.items())):
    n_connections = Counter()
    n_connections += data["connections"][0]  # positive
    n_connections += data["connections"][1]  # negative

    weighted_sent = Counter()
    weighted_sent_2 = Counter()

    for conn, weight in n_connections.most_common(10):
        weighted_sent += {entities[conn]["first_degree"]: weight}
        second_ent = entities[conn]

        n_connections_2 = Counter()
        n_connections_2 += second_ent["connections"][0]  # positive
        n_connections_2 += second_ent["connections"][1]  # negative

        for conn_2, weight_2 in n_connections_2.most_common(10):
            weighted_sent_2 += {entities[conn_2]["first_degree"]: weight_2}
            
    entities[ent]["second_degree"] = (weighted_sent.most_common(1)[0][0]) if len(weighted_sent.most_common(1)) else "neutral"
    entities[ent]["third_degree"] = (weighted_sent_2.most_common(1)[0][0]) if len(weighted_sent_2.most_common(1)) else "neutral"

100%|██████████| 56945/56945 [02:17<00:00, 414.61it/s] 


In [32]:
{k: v for k, v in entities["armenia"].items() if k != "connections"}

{'positive': 3364,
 'negative': 694,
 'neutral': 5121,
 'first_degree': 'positive',
 'second_degree': 'negative',
 'third_degree': 'negative'}

In [36]:
np.sum([ent["first_degree"] == ent["second_degree"] for ent in entities.values()]) * 100 / len(entities)

62.19158837474756

In [37]:
np.sum([ent["first_degree"] == ent["third_degree"] for ent in entities.values()]) * 100 / len(entities)

46.4237422073931

In [38]:
np.sum([ent["third_degree"] == ent["second_degree"] for ent in entities.values()]) * 100 / len(entities)

70.29063131091404