In [1]:
import pandas as pd
import numpy as np
from topsis import topsis
from tqdm import tqdm
from math import ceil
from math import sqrt

In [2]:
main_folder = "covid_vaccine"

In [3]:
sb_path = "../datasets/" + main_folder + "/source_backlinks.csv"
sc_path = "../datasets/" + main_folder + "/source_check.csv"

In [4]:
sb_df = pd.read_csv(sb_path).drop("Unnamed: 0", axis=1)
sc_df = pd.read_csv(sc_path).drop("Unnamed: 0", axis=1)

In [5]:
sb_df.head()

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UCIALMKvObZNtJ6AmdCLP7Lg,Bloomberg Television,751,1237,0,0,30052
1,UC0vn8ISa4LKMunLbzaXLnOQ,GBNews,0,1076,995,12753,9645
2,UCLbidg2ZT49dWrxDkwNKr0g,Doc Schmidt,0,27185,1,0,0
3,UCaZ2jG4qd1vY-Tkwiq9FBlw,Rich Black Guy,1,0,1,10,0
4,UC_gUM8rL-Lrg6O3adPW9K1g,WION,0,820,9070,73,0


In [6]:
sc_df.head()

Unnamed: 0,channel_id,channel_name,LinkedIn,Wiki,Website,Twitter,Facebook
0,UCIALMKvObZNtJ6AmdCLP7Lg,Bloomberg Television,True,True,False,False,True
1,UC0vn8ISa4LKMunLbzaXLnOQ,GBNews,False,True,True,True,True
2,UCLbidg2ZT49dWrxDkwNKr0g,Doc Schmidt,True,True,True,True,True
3,UCaZ2jG4qd1vY-Tkwiq9FBlw,Rich Black Guy,True,False,True,True,True
4,UC_gUM8rL-Lrg6O3adPW9K1g,WION,False,True,True,True,True


---

In [7]:
comp_dict = {}
comp_dict["channel_id"] = {}
comp_dict["profiles"] = {}
comp_dict["external_sites"] = {}

for i in range(sc_df.shape[0]):
    comp_dict["channel_id"][i] = sc_df.iloc[i]["channel_id"]
    
    linkedIn = sc_df.iloc[i]["LinkedIn"]
    wiki = sc_df.iloc[i]["Wiki"]
    website = sc_df.iloc[i]["Website"]
    twitter = sc_df.iloc[i]["Twitter"]
    facebook = sc_df.iloc[i]["Facebook"]
    
    # print(f"[{i}] {linkedIn}, {facebook}, {twitter}")
    
    # Profiles
    if linkedIn and (facebook or twitter):
        comp_dict["profiles"][i] = 20
    elif (not linkedIn) and facebook and twitter:
        comp_dict["profiles"][i] = 10
    elif (not linkedIn) and (facebook or twitter):
        comp_dict["profiles"][i] = 5
    elif (not linkedIn) and (not facebook) and (not twitter):
        comp_dict["profiles"][i] = 0
    
    # External Sites
    if wiki and website:
        comp_dict["external_sites"][i] = 20
    elif wiki and not website:
        comp_dict["external_sites"][i] = 10
    elif not wiki and website:
        comp_dict["external_sites"][i] = 5
    elif not wiki and not website:
        comp_dict["external_sites"][i] = 0

In [8]:
comp_df = pd.DataFrame.from_dict(comp_dict)
comp_df = pd.concat([comp_df, sb_df[["LinkedIn", "Wiki", "Website", "Twitter", "Facebook"]]], axis=1)

In [9]:
comp_df.head()

Unnamed: 0,channel_id,profiles,external_sites,LinkedIn,Wiki,Website,Twitter,Facebook
0,UCIALMKvObZNtJ6AmdCLP7Lg,20,10,751,1237,0,0,30052
1,UC0vn8ISa4LKMunLbzaXLnOQ,10,20,0,1076,995,12753,9645
2,UCLbidg2ZT49dWrxDkwNKr0g,20,20,0,27185,1,0,0
3,UCaZ2jG4qd1vY-Tkwiq9FBlw,20,5,1,0,1,10,0
4,UC_gUM8rL-Lrg6O3adPW9K1g,10,20,0,820,9070,73,0


In [10]:
weights = {
    "profiles": 0.10,
    "external_sites": 0.10,
    "LinkedIn": 0.25,
    "Wiki": 0.25,
    "Website": 0.10,
    "Twitter": 0.10,
    "Facebook": 0.10
}

In [11]:
comp_df["vs"] = topsis(comp_df, weights)

In [12]:
vs_df = comp_df[["channel_id", "vs"]].sort_values("vs", ascending=False).reset_index().drop("index", axis=1)
vs_df

Unnamed: 0,channel_id,vs
0,UCupvZG-5ko_eiXAupbDfxWw,0.781417
1,UCPWXiRWZ29zrxPFIQT7eHSA,0.398260
2,UC16niRr50-MSBwiO3YDb3RA,0.262098
3,UC52X5wxOL_s5yw0dQk7NtgA,0.242914
4,UCi8URBP1KDF0KuG_ntPFS2w,0.242914
...,...,...
101,UC3prwMn9aU2z5Y158ZdGyyA,0.000000
102,UCG5TBPANNSiKf1Dp-R5Dibg,0.000000
103,UCf3BkFTwI4G1vMc6W-NpQNw,0.000000
104,UCrmxzSIf7J66qvlOAT9AlZQ,0.000000


In [13]:
path = "../datasets/" + main_folder + "/verifiability_scores.csv"
vs_df.to_csv(path)

---

In [14]:
top_5 = vs_df.head()
top_5

Unnamed: 0,channel_id,vs
0,UCupvZG-5ko_eiXAupbDfxWw,0.781417
1,UCPWXiRWZ29zrxPFIQT7eHSA,0.39826
2,UC16niRr50-MSBwiO3YDb3RA,0.262098
3,UC52X5wxOL_s5yw0dQk7NtgA,0.242914
4,UCi8URBP1KDF0KuG_ntPFS2w,0.242914


In [15]:
bottom_5 = vs_df.tail()
bottom_5

Unnamed: 0,channel_id,vs
101,UC3prwMn9aU2z5Y158ZdGyyA,0.0
102,UCG5TBPANNSiKf1Dp-R5Dibg,0.0
103,UCf3BkFTwI4G1vMc6W-NpQNw,0.0
104,UCrmxzSIf7J66qvlOAT9AlZQ,0.0
105,UCmtPDgiOcMStJt-f12yGTpg,0.0


In [16]:
comp_df.loc[comp_df["channel_id"] == "UCupvZG-5ko_eiXAupbDfxWw"]

Unnamed: 0,channel_id,profiles,external_sites,LinkedIn,Wiki,Website,Twitter,Facebook,vs
37,UCupvZG-5ko_eiXAupbDfxWw,20,20,192974,353421,1278644,51138,188271,0.781417


In [17]:
comp_df.loc[comp_df["channel_id"] == "UCmtPDgiOcMStJt-f12yGTpg"]

Unnamed: 0,channel_id,profiles,external_sites,LinkedIn,Wiki,Website,Twitter,Facebook,vs
59,UCmtPDgiOcMStJt-f12yGTpg,0,0,0,0,0,0,0,0.0
