In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from math import ceil
from math import sqrt

In [2]:
def topsis(scores, weights):
    wndm = {}

    for column in weights.keys():
        temp_list = []
        x = 0
        for i in range(0, scores.shape[0]):
            num = scores.iloc[i][column] ** 2
            x += num
        denominator = sqrt(x)
        
        if denominator == 0:
            continue

        # Normalize scores
        for i in range(0, scores.shape[0]):
            norm_score = scores.iloc[i][column] / denominator
            temp_list.append(norm_score)

        # Apply weight
        for i in range(0, len(temp_list)):
            temp_list[i] *= weights.get(column)

        wndm.update({column: temp_list})

    wndm_df = pd.DataFrame.from_dict(wndm)
    ideal_best = wndm_df.max()
    ideal_worst = wndm_df.min()

    dist_from_best = []
    dist_from_worst = []

    # Euclidean distance from ideal best
    for i in range(0, wndm_df.shape[0]):
        temp_num = 0
        for column in wndm_df.columns:
            temp_num += (wndm_df.iloc[i][column] - ideal_best[column]) ** 2
        dist_from_best.append(sqrt(temp_num))

    # Euclidean distance from ideal worst
    for i in range(0, wndm_df.shape[0]):
        temp_num = 0
        for column in wndm_df.columns:
            temp_num += (wndm_df.iloc[i][column] - ideal_worst[column]) ** 2
        dist_from_worst.append(sqrt(temp_num))

    performance_rank = []
    for i in range(0, wndm_df.shape[0]):
        performance_rank.append(dist_from_worst[i] / (dist_from_best[i] + dist_from_worst[i]))

    performance_rank = pd.Series(np.array(performance_rank))

    return performance_rank

In [3]:
main_folder = "israel-palestine_conflict_history"

In [4]:
sb_path = "../datasets/" + main_folder + "/source_backlinks.csv"
sc_path = "../datasets/" + main_folder + "/source_check.csv"

In [5]:
sb_df = pd.read_csv(sb_path).drop("Unnamed: 0", axis=1)
sc_df = pd.read_csv(sc_path).drop("Unnamed: 0", axis=1)

In [6]:
sb_df.head()

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,TRT World,0,0,20309,196151,0
1,UC9MAhZQQd9egwWCxrwSIsJQ,HISTORY,0,0,49829,196151,0
2,UC_gUM8rL-Lrg6O3adPW9K1g,WION,0,0,9070,73,0
3,UCLXo7UDZvByw2ixzpQCufnA,Vox,0,0,188553,8643,539
4,UCVgO39Bk5sMo66-6o6Spn6Q,ABC News (Australia),0,0,2087,8643,539


In [7]:
sc_df.head()

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,TRT World,False,False,True,True,True
1,UC9MAhZQQd9egwWCxrwSIsJQ,HISTORY,False,False,True,True,True
2,UC_gUM8rL-Lrg6O3adPW9K1g,WION,False,False,True,True,True
3,UCLXo7UDZvByw2ixzpQCufnA,Vox,False,False,True,True,True
4,UCVgO39Bk5sMo66-6o6Spn6Q,ABC News (Australia),False,False,True,True,True


---

In [8]:
comp_dict = {}
comp_dict["channel_id"] = {}
comp_dict["profiles"] = {}
comp_dict["external_sites"] = {}

for i in range(sc_df.shape[0]):
    comp_dict["channel_id"][i] = sc_df.iloc[i]["channel_id"]
    
    linkedIn = sc_df.iloc[i]["LinkedIn"]
    wiki = sc_df.iloc[i]["Wiki"]
    website = sc_df.iloc[i]["Website"]
    twitter = sc_df.iloc[i]["Twitter"]
    facebook = sc_df.iloc[i]["Facebook"]
    
    # Profiles
    if linkedIn and (facebook or twitter):
        comp_dict["profiles"][i] = 20
    elif (not linkedIn) and facebook and twitter:
        comp_dict["profiles"][i] = 10
    elif (not linkedIn) and (facebook or twitter):
        comp_dict["profiles"][i] = 5
    elif (not linkedIn) and (not facebook) and (not twitter):
        comp_dict["profiles"][i] = 0
    
    # External Sites
    if wiki and website:
        comp_dict["external_sites"][i] = 20
    elif wiki and not website:
        comp_dict["external_sites"][i] = 10
    elif not wiki and website:
        comp_dict["external_sites"][i] = 5
    elif not wiki and not website:
        comp_dict["external_sites"][i] = 0

In [9]:
comp_df = pd.DataFrame.from_dict(comp_dict)
comp_df = pd.concat([comp_df, sb_df[["LinkedIn", "Wiki", "Website", "Twitter", "Facebook"]]], axis=1)

In [10]:
comp_df.head()

Unnamed: 0,channel_id,profiles,external_sites,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,10,5,0,0,20309,196151,0
1,UC9MAhZQQd9egwWCxrwSIsJQ,10,5,0,0,49829,196151,0
2,UC_gUM8rL-Lrg6O3adPW9K1g,10,5,0,0,9070,73,0
3,UCLXo7UDZvByw2ixzpQCufnA,10,5,0,0,188553,8643,539
4,UCVgO39Bk5sMo66-6o6Spn6Q,10,5,0,0,2087,8643,539


In [11]:
weights = {
    "profiles": 0.10,
    "external_sites": 0.10,
    "LinkedIn": 0.25,
    "Wiki": 0.25,
    "Website": 0.10,
    "Twitter": 0.10,
    "Facebook": 0.10
}

In [12]:
comp_df["vs"] = topsis(comp_df, weights)

In [13]:
vs_df = comp_df[["channel_id", "vs"]].sort_values("vs", ascending=False).reset_index().drop("index", axis=1)
vs_df

Unnamed: 0,channel_id,vs
0,UCPxMZIFE856tbTfdkdjzTSQ,0.648603
1,UCupvZG-5ko_eiXAupbDfxWw,0.317982
2,UC9MAhZQQd9egwWCxrwSIsJQ,0.201764
3,UC7fWeaHhqgM4Ry-RMpM2YYw,0.201514
4,UCIRYBXDze5krPDzAEOxFGVA,0.184968
...,...,...
113,UChDmf55WYF2keOb-khGk1iw,0.000000
114,UC2Cl2g2xFTZoAEldxYVzQFg,0.000000
115,UCj5RwDivLksanrNvkW0FB4w,0.000000
116,UCWQ9ZFFhEqUZ0r1IspnBm6Q,0.000000


In [14]:
path = "../datasets/" + main_folder + "/verifiability_scores.csv"
vs_df.to_csv(path)

---

In [15]:
top_5 = vs_df.head()
top_5

Unnamed: 0,channel_id,vs
0,UCPxMZIFE856tbTfdkdjzTSQ,0.648603
1,UCupvZG-5ko_eiXAupbDfxWw,0.317982
2,UC9MAhZQQd9egwWCxrwSIsJQ,0.201764
3,UC7fWeaHhqgM4Ry-RMpM2YYw,0.201514
4,UCIRYBXDze5krPDzAEOxFGVA,0.184968


In [16]:
bottom_5 = vs_df.tail()
bottom_5

Unnamed: 0,channel_id,vs
113,UChDmf55WYF2keOb-khGk1iw,0.0
114,UC2Cl2g2xFTZoAEldxYVzQFg,0.0
115,UCj5RwDivLksanrNvkW0FB4w,0.0
116,UCWQ9ZFFhEqUZ0r1IspnBm6Q,0.0
117,UC0yLX6hpcYhAU3iVjhwD5-A,0.0


In [19]:
comp_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
profiles,118.0,7.627119,4.746202,0.0,5.0,10.0,10.0,20.0
external_sites,118.0,2.923729,2.474336,0.0,0.0,5.0,5.0,5.0
LinkedIn,118.0,0.042373,0.460287,0.0,0.0,0.0,0.0,5.0
Wiki,118.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Website,118.0,42268.542373,192529.203981,0.0,0.0,2.0,1787.0,1301748.0
Twitter,118.0,5314.610169,26190.385484,0.0,0.0,1.0,166.75,196151.0
Facebook,118.0,1639.211864,17330.223496,0.0,0.0,0.0,0.0,188271.0
vs,118.0,0.051032,0.0696,0.0,0.034779,0.043942,0.052713,0.6486026
