In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from math import ceil
from math import sqrt

In [2]:
def topsis(scores, weights):
    wndm = {}

    for column in weights.keys():
        temp_list = []
        x = 0
        for i in range(0, scores.shape[0]):
            num = scores.iloc[i][column] ** 2
            x += num
        denominator = sqrt(x)
        
        if denominator == 0:
            continue

        # Normalize scores
        for i in range(0, scores.shape[0]):
            norm_score = scores.iloc[i][column] / denominator
            temp_list.append(norm_score)

        # Apply weight
        for i in range(0, len(temp_list)):
            temp_list[i] *= weights.get(column)

        wndm.update({column: temp_list})

    wndm_df = pd.DataFrame.from_dict(wndm)
    ideal_best = wndm_df.max()
    ideal_worst = wndm_df.min()

    dist_from_best = []
    dist_from_worst = []

    # Euclidean distance from ideal best
    for i in range(0, wndm_df.shape[0]):
        temp_num = 0
        for column in wndm_df.columns:
            temp_num += (wndm_df.iloc[i][column] - ideal_best[column]) ** 2
        dist_from_best.append(sqrt(temp_num))

    # Euclidean distance from ideal worst
    for i in range(0, wndm_df.shape[0]):
        temp_num = 0
        for column in wndm_df.columns:
            temp_num += (wndm_df.iloc[i][column] - ideal_worst[column]) ** 2
        dist_from_worst.append(sqrt(temp_num))

    performance_rank = []
    for i in range(0, wndm_df.shape[0]):
        performance_rank.append(dist_from_worst[i] / (dist_from_best[i] + dist_from_worst[i]))

    performance_rank = pd.Series(np.array(performance_rank))

    return performance_rank

In [3]:
main_folder = "israel-palestine_conflict_history"

In [4]:
sb_path = "../datasets/" + main_folder + "/source_backlinks.csv"
sc_path = "../datasets/" + main_folder + "/source_check.csv"

In [5]:
sb_df = pd.read_csv(sb_path).drop("Unnamed: 0", axis=1)
sc_df = pd.read_csv(sc_path).drop("Unnamed: 0", axis=1)

In [6]:
sb_df.head()

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,TRT World,0,0,20309,196151,0
1,UC9MAhZQQd9egwWCxrwSIsJQ,HISTORY,0,0,49829,196151,0
2,UC_gUM8rL-Lrg6O3adPW9K1g,WION,0,0,9070,73,0
3,UCLXo7UDZvByw2ixzpQCufnA,Vox,0,0,188553,8643,539
4,UCVgO39Bk5sMo66-6o6Spn6Q,ABC News (Australia),0,0,2087,8643,539


In [7]:
sc_df.head()

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,TRT World,False,False,True,True,True
1,UC9MAhZQQd9egwWCxrwSIsJQ,HISTORY,False,False,True,True,True
2,UC_gUM8rL-Lrg6O3adPW9K1g,WION,False,False,True,True,True
3,UCLXo7UDZvByw2ixzpQCufnA,Vox,False,False,True,True,True
4,UCVgO39Bk5sMo66-6o6Spn6Q,ABC News (Australia),False,False,True,True,True


---

In [8]:
comp_dict = {}
comp_dict["channel_id"] = {}
comp_dict["profiles"] = {}
comp_dict["external_sites"] = {}

for i in range(sc_df.shape[0]):
    comp_dict["channel_id"][i] = sc_df.iloc[i]["channel_id"]
    
    linkedIn = sc_df.iloc[i]["LinkedIn"]
    wiki = sc_df.iloc[i]["Wiki"]
    website = sc_df.iloc[i]["Website"]
    twitter = sc_df.iloc[i]["Twitter"]
    facebook = sc_df.iloc[i]["Facebook"]
    
    # Profiles
    if linkedIn and (facebook or twitter):
        comp_dict["profiles"][i] = 20
    elif (not linkedIn) and facebook and twitter:
        comp_dict["profiles"][i] = 10
    elif (not linkedIn) and (facebook or twitter):
        comp_dict["profiles"][i] = 5
    elif (not linkedIn) and (not facebook) and (not twitter):
        comp_dict["profiles"][i] = 0
    
    # External Sites
    if wiki and website:
        comp_dict["external_sites"][i] = 20
    elif wiki and not website:
        comp_dict["external_sites"][i] = 10
    elif not wiki and website:
        comp_dict["external_sites"][i] = 5
    elif not wiki and not website:
        comp_dict["external_sites"][i] = 0

In [9]:
comp_df = pd.DataFrame.from_dict(comp_dict)
comp_df = pd.concat([comp_df, sb_df[["LinkedIn", "Wiki", "Website", "Twitter", "Facebook"]]], axis=1)

In [10]:
comp_df.head()

Unnamed: 0,channel_id,profiles,external_sites,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,10,5,0,0,20309,196151,0
1,UC9MAhZQQd9egwWCxrwSIsJQ,10,5,0,0,49829,196151,0
2,UC_gUM8rL-Lrg6O3adPW9K1g,10,5,0,0,9070,73,0
3,UCLXo7UDZvByw2ixzpQCufnA,10,5,0,0,188553,8643,539
4,UCVgO39Bk5sMo66-6o6Spn6Q,10,5,0,0,2087,8643,539


In [11]:
weights = {
    "profiles": 0.10,
    "external_sites": 0.10,
    "LinkedIn": 0.25,
    "Wiki": 0.25,
    "Website": 0.10,
    "Twitter": 0.10,
    "Facebook": 0.10
}

In [12]:
comp_df["vs"] = topsis(comp_df, weights)

In [13]:
vs_df = comp_df[["channel_id", "vs"]].sort_values("vs", ascending=False).reset_index().drop("index", axis=1)
vs_df

Unnamed: 0,channel_id,vs
0,UCPxMZIFE856tbTfdkdjzTSQ,0.648603
1,UCupvZG-5ko_eiXAupbDfxWw,0.317982
2,UC9MAhZQQd9egwWCxrwSIsJQ,0.201764
3,UC7fWeaHhqgM4Ry-RMpM2YYw,0.201514
4,UCIRYBXDze5krPDzAEOxFGVA,0.184968
...,...,...
113,UChDmf55WYF2keOb-khGk1iw,0.000000
114,UC2Cl2g2xFTZoAEldxYVzQFg,0.000000
115,UCj5RwDivLksanrNvkW0FB4w,0.000000
116,UCWQ9ZFFhEqUZ0r1IspnBm6Q,0.000000


In [14]:
path = "../datasets/" + main_folder + "/verifiability_scores.csv"
vs_df.to_csv(path)

---

In [4]:
sl_path = "../datasets/" + main_folder + "/source_links.csv"
sb_path = "../datasets/" + main_folder + "/source_backlinks.csv"
sc_path = "../datasets/" + main_folder + "/source_check.csv"
vs_path = "../datasets/" + main_folder + "/verifiability_scores.csv"

In [7]:
sl_df = pd.read_csv(sl_path).drop("Unnamed: 0", axis=1)
sb_df = pd.read_csv(sb_path).drop("Unnamed: 0", axis=1)
sc_df = pd.read_csv(sc_path).drop("Unnamed: 0", axis=1)
vs_df = pd.read_csv(vs_path).drop("Unnamed: 0", axis=1)

In [10]:
vs_df.head(10)

Unnamed: 0,channel_id,vs
0,UCPxMZIFE856tbTfdkdjzTSQ,0.633028
1,UCupvZG-5ko_eiXAupbDfxWw,0.315677
2,UC7fWeaHhqgM4Ry-RMpM2YYw,0.241028
3,UCIRYBXDze5krPDzAEOxFGVA,0.179367
4,UCaXkIU1QidjPwiAYu6GcHjg,0.147938
5,UCw3fku0sH3qA3c3pZeJwdAw,0.085677
6,UCXIJgqnII2ZOINSWNOGFThA,0.08252
7,UCAuUUnT6oDeKwE6v1NGQxug,0.081348
8,UCvQECJukTDE2i6aCoMnS-Vg,0.075032
9,UCVgO39Bk5sMo66-6o6Spn6Q,0.074467


In [16]:
stats = vs_df.describe().T
stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
vs,123.0,0.04923,0.066871,0.0,0.026652,0.042309,0.050606,0.633028


In [18]:
stats.iloc[0]["max"]

0.6330276486586413

In [37]:
category = []
'''
<=max and >=75% - Very Verifiable
<75% and >=50% - Verfiable
<50% and >=25% - Somewhat Verifiable
<25% but >0% - Not so Verifiable
==0% - Cannot be verified
'''
for i in range(vs_df.shape[0]):
    score = vs_df.iloc[i]["vs"]
    
    if score <= stats.iloc[0]["max"] and score >= stats.iloc[0]["75%"]:
        category.append("Very Verifiable")
    elif score < stats.iloc[0]["75%"] and score >= stats.iloc[0]["50%"]:
        category.append("Verifiable")
    elif score < stats.iloc[0]["50%"] and score >= stats.iloc[0]["25%"]:
        category.append("Somewhat Verifiable")
    elif score < stats.iloc[0]["25%"] and score > stats.iloc[0]["min"]:
        category.append("Not so Verifiable")
    elif score == 0.0:
        category.append("Cannot be verified")
        
category = pd.Series(category, name="Category")
categorized_vf = pd.concat([vs_df, category], axis=1)
categorized_vf.head(10)

Unnamed: 0,channel_id,vs,Category
0,UCPxMZIFE856tbTfdkdjzTSQ,0.633028,Very Verifiable
1,UCupvZG-5ko_eiXAupbDfxWw,0.315677,Very Verifiable
2,UC7fWeaHhqgM4Ry-RMpM2YYw,0.241028,Very Verifiable
3,UCIRYBXDze5krPDzAEOxFGVA,0.179367,Very Verifiable
4,UCaXkIU1QidjPwiAYu6GcHjg,0.147938,Very Verifiable
5,UCw3fku0sH3qA3c3pZeJwdAw,0.085677,Very Verifiable
6,UCXIJgqnII2ZOINSWNOGFThA,0.08252,Very Verifiable
7,UCAuUUnT6oDeKwE6v1NGQxug,0.081348,Very Verifiable
8,UCvQECJukTDE2i6aCoMnS-Vg,0.075032,Very Verifiable
9,UCVgO39Bk5sMo66-6o6Spn6Q,0.074467,Very Verifiable


In [36]:
categorized_vf.to_csv("../datasets/" + main_folder + "/verifiability_scores.csv")

In [38]:
sl_df

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,TRT World,,,trtworld.com,twitter.com/trtworld,facebook.com/TRTWorld
1,UC9MAhZQQd9egwWCxrwSIsJQ,HISTORY,,,history.com,twitter.com/HISTORY,facebook.com/History
2,UC_gUM8rL-Lrg6O3adPW9K1g,WION,,,wionews.com,twitter.com/WIONews,facebook.com/WIONews
3,UCLXo7UDZvByw2ixzpQCufnA,Vox,,,vox.com,twitter.com/voxdotcom,facebook.com/vox
4,UCVgO39Bk5sMo66-6o6Spn6Q,ABC News (Australia),,,abc.net.au/news,twitter.com/abcnews,facebook.com/abcnews.au
...,...,...,...,...,...,...,...
118,UCCWv2HujFrmLKDofsanY_GQ,Andrew,,,,,
119,UCSW0CYhKC1J0cpknZvN8MvQ,Universiteit van Nederland,,,universiteitvannederland.nl,twitter.com/universiteitnl,facebook.com/universiteitvannederland
120,UCPWXiRWZ29zrxPFIQT7eHSA,The Hill,,,twitter.com/thehill,twitter.com/RisingTheHill,facebook.com/HillTVLive
121,UCatL-c6pmnjzEOHSyjn-sHA,Khan GS Research Centre,,,khanglobalstudies.com,twitter.com/khansirpatna?s=08,facebook.com/khangsresearchcenter


In [39]:
sb_df

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UC7fWeaHhqgM4Ry-RMpM2YYw,TRT World,0,0,20309,196151,0
1,UC9MAhZQQd9egwWCxrwSIsJQ,HISTORY,0,0,49829,308,1
2,UC_gUM8rL-Lrg6O3adPW9K1g,WION,0,0,9070,73,0
3,UCLXo7UDZvByw2ixzpQCufnA,Vox,0,0,188553,8643,539
4,UCVgO39Bk5sMo66-6o6Spn6Q,ABC News (Australia),0,0,2087,38191,3375
...,...,...,...,...,...,...,...
118,UCCWv2HujFrmLKDofsanY_GQ,Andrew,0,0,0,0,0
119,UCSW0CYhKC1J0cpknZvN8MvQ,Universiteit van Nederland,0,0,5378,1,0
120,UCPWXiRWZ29zrxPFIQT7eHSA,The Hill,0,0,2595,0,0
121,UCatL-c6pmnjzEOHSyjn-sHA,Khan GS Research Centre,0,0,276,1,0
