In [1]:
import pandas as pd
import json
import ast
import warnings
from ast import literal_eval as le
import plotly.express as px

warnings.filterwarnings("ignore")
import requests
import pandas as pd
import urlexpander


def unshorten(url):
    if "http" not in url:
        url = "http://" + url

    try:
        short = urlexpander.expand(url)
    except:
        short = url

    if "_ERROR__" in short:
        return url
    else:
        return short


def one_url_df(df, column="n_urls", urls="urls"):
    df = df[df[column] == 1]
    df[urls] = df[urls].apply(lambda x: x[0])
    return df


def more_url_df(df, interaction, n="n_urls", urls="urls"):
    df = df[df[n] > 1]
    temp = []
    for i in df.iterrows():
        row = i[1].to_dict()
        for url in row[urls]:
            temp.append((url, row[interaction], row[n]))
    return (
        pd.DataFrame(temp)
        .drop_duplicates()
        .rename(columns={0: urls, 1: interaction, 2: n})
    )

def plot(df, x):
    fig = px.histogram(df, x)
    fig.show();
    
def fix(df):
    df["urls"] = df["urls"].apply(lambda x: le(x))
    df["n_urls"] = df["urls"].apply(lambda x: len(x))

def un(df):
    df["urls"] = df["urls"].apply(lambda x: unshorten(x))
    
def sort(df, column):
    df.sort_values(column, inplace=True, ascending=False)

# Top Shared

## Top Interacted

In [2]:
import os

In [3]:
# top interacted all files paths
all_top_interacted = [
    "../Top_Interacted/Data/" + i
    for i in os.listdir("../Top_Interacted/Data/")
    if i.split(".")[-1] == "csv"
]
all_top_interacted

['../Top_Interacted/Data/change_4_months.csv',
 '../Top_Interacted/Data/change_dec.csv',
 '../Top_Interacted/Data/instagram_4_months.csv',
 '../Top_Interacted/Data/instagram_dec.csv',
 '../Top_Interacted/Data/reddits_4_months.csv',
 '../Top_Interacted/Data/reddits_dec.csv',
 '../Top_Interacted/Data/twitter_4_months.csv',
 '../Top_Interacted/Data/twitter_dec.csv',
 '../Top_Interacted/Data/youtube_4_months.csv',
 '../Top_Interacted/Data/youtube_dec.csv',
 '../Top_Interacted/Data/_all_4_months.csv',
 '../Top_Interacted/Data/_all_dec.csv']

In [4]:
all_top_interacted_4_months = [i for i in all_top_interacted if "4_months" in i]
all_top_interacted_4_months

['../Top_Interacted/Data/change_4_months.csv',
 '../Top_Interacted/Data/instagram_4_months.csv',
 '../Top_Interacted/Data/reddits_4_months.csv',
 '../Top_Interacted/Data/twitter_4_months.csv',
 '../Top_Interacted/Data/youtube_4_months.csv',
 '../Top_Interacted/Data/_all_4_months.csv']

In [5]:
all_top_interacted_dec = [i for i in all_top_interacted if "dec" in i]
all_top_interacted_dec

['../Top_Interacted/Data/change_dec.csv',
 '../Top_Interacted/Data/instagram_dec.csv',
 '../Top_Interacted/Data/reddits_dec.csv',
 '../Top_Interacted/Data/twitter_dec.csv',
 '../Top_Interacted/Data/youtube_dec.csv',
 '../Top_Interacted/Data/_all_dec.csv']

In [6]:
li_4 = []
for filename in all_top_interacted_4_months:
    df = pd.read_csv(filename, index_col=None)
    li_4.append(df)

df_top_interacted_4_months = pd.concat(li_4, ignore_index=True).drop(columns={"Unnamed: 0"})
df_top_interacted_4_months

Unnamed: 0,shared_url,id/url,text,n_likes,total,url_id,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,https://www.change.org/p/all-world-leaders-des...,25752458,"sign this, the fate of the world depends on it...",2.0,2.0,change_petition_comments_4_months_0,,,,,,,,,,
1,https://www.change.org/p/%D0%BC%D0%B8%D0%B4-%D...,13641580,Johnny hold on! Now the same script was writte...,1.0,1.0,change_petition_comments_4_months_1,,,,,,,,,,
2,https://www.change.org/p/%D0%BC%D0%B8%D0%B4-%D...,13641580,Johnny hold on! Now the same script was writte...,1.0,1.0,change_petition_comments_4_months_1,,,,,,,,,,
3,https://www.change.org/p/loma-vista-recordings...,13641580,Johnny hold on! Now the same script was writte...,1.0,1.0,change_petition_comments_4_months_1,,,,,,,,,,
4,https://www.change.org/p/loma-vista-recordings...,13641580,Johnny hold on! Now the same script was writte...,1.0,1.0,change_petition_comments_4_months_1,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1509,https://www.change.org/p/warner-brothers-keep-...,25699776,"<a href=""https://www.change.org/p/warner-broth...",0.0,0.0,change_petition_comments_4_months_10,,,,,,,,,,
1510,https://www.change.org/p/warner-brothers-keep-...,25699776,"<a href=""https://www.change.org/p/warner-broth...",0.0,0.0,change_petition_comments_4_months_10,,,,,,,,,,
1511,http://www.eximp.com.co/,26128212,Granolas Delihealth\nEn Delihealth. Solicita t...,0.0,0.0,change_petition_comments_4_months_11,,,,,,,,,,
1512,https://www.change.org/p/walt-disney-back-john...,20186707,http://chng.it/VzrzLNcM,0.0,0.0,change_petition_comments_4_months_12,,,,,,,,,,


In [7]:
li_dec = []
for filename in all_top_interacted_dec:
    df = pd.read_csv(filename, index_col=None)
    li_dec.append(df)

df_top_interacted_dec = pd.concat(li_dec, ignore_index=True).drop(
    columns={"Unnamed: 0"}
)
df_top_interacted_dec

Unnamed: 0,shared_url,id/url,text,n_likes,total,url_id,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments,platform
0,https://www.documentcloud.org/documents/583586...,25809054,Amber Heard won and is innocent... UK Judge Ni...,9.0,9.0,change_petition_comments_dec_0,,,,,,,,,,,
1,https://www.documentcloud.org/documents/583586...,25809054,Amber Heard won and is innocent... UK Judge Ni...,9.0,9.0,change_petition_comments_dec_0,,,,,,,,,,,
2,https://www.youtube.com/channel/UCjKNc-woAlnxt...,25809054,Amber Heard won and is innocent... UK Judge Ni...,9.0,9.0,change_petition_comments_dec_0,,,,,,,,,,,
3,https://www.youtube.com/channel/UCjKNc-woAlnxt...,25809054,Amber Heard won and is innocent... UK Judge Ni...,9.0,9.0,change_petition_comments_dec_0,,,,,,,,,,,
4,https://www.latimes.com/entertainment/gossip/l...,25809054,Amber Heard won and is innocent... UK Judge Ni...,9.0,9.0,change_petition_comments_dec_0,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1323,https://www.change.org/p/united-nations-remove...,14094292,"Another similar petition, please sign:\n\n<a h...",0.0,0.0,change_petition_comments_dec_13,,,,,,,,,,,change
1324,https://www.change.org/p/united-nations-remove...,14094292,"Another similar petition, please sign:\n\n<a h...",0.0,0.0,change_petition_comments_dec_13,,,,,,,,,,,change
1325,https://www.change.org/p/united-nations-remove...,22874214,"New petition against Amber Heard, please sign:...",0.0,0.0,change_petition_comments_dec_14,,,,,,,,,,,change
1326,https://www.change.org/p/united-nations-remove...,22874214,"New petition against Amber Heard, please sign:...",0.0,0.0,change_petition_comments_dec_14,,,,,,,,,,,change


## Shared URLs

In [8]:
shared_4_months = pd.read_csv("../URLs_Data/df_shared_4_months.csv")
shared_4_months.fillna("{1}", inplace=True)
shared_4_months["n_urls"] = shared_4_months["n_urls"].apply(lambda x: le(x))
shared_4_months.rename(columns={"shared_link": "shared_url"}, inplace=True)
shared_4_months.head(5)

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from
0,https://www.change.org/p/walt-disney-back-john...,6,1,"(1181480925,)",{1},change,petition_comment
1,https://www.change.org/p/me-johnny-depp-as-jac...,3,1,"(1181480925,)",{1},change,petition_comment
2,https://www.change.org/p/%D0%BC%D0%B8%D0%B4-%D...,2,1,{1179297829},{4},change,petition_comment
3,https://www.change.org/p/loma-vista-recordings...,2,1,{1179297829},{4},change,petition_comment
4,https://www.change.org/p/warner-brothers-keep-...,2,1,{1198070350},{2},change,petition_comment


In [9]:
shared_dec = pd.read_csv("../URLs_Data/df_shared_dec.csv")
shared_dec.rename(columns={"shared_link": "shared_url"}, inplace=True)
shared_dec.head(5)

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from
0,https://www.documentcloud.org/documents/583586...,16,4,"{948054032, 1129808400, 1136503772, 1081693988}","{8, 10, 4}",change,petition_comment
1,https://www.latimes.com/entertainment/gossip/l...,16,4,"{948054032, 1129808400, 1136503772, 1081693988}","{8, 10, 4}",change,petition_comment
2,https://www.truehollywoodtalk.com/amber-heards...,10,2,"{1136503772, 1081693988}","{8, 10, 2}",change,petition_comment
3,https://www.youtube.com/channel/UCjKNc-woAlnxt...,8,2,"{1136503772, 1081693988}","{8, 10}",change,petition_comment
4,https://www.change.org/p/we-support-amber-hear...,6,1,{1081693988},{10},change,petition_comment


# 4

### Top shared youtube across all platforms

In [10]:
top_shared_4_months_all_platforms = pd.merge(
    shared_4_months, df_top_interacted_4_months, how="inner"
)
top_shared_4_months_all_platforms = top_shared_4_months_all_platforms.sort_values(
    "total", ascending=False
).reset_index(drop=True)

In [11]:
top_shared_4_months_all_platforms

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,n_likes,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,https://twitter.com/AmberryYt,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
1,https://www.youtube.com/watch?v=HIShWZkr0hs&li...,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
2,https://www.instagram.com/amberryphoeberry,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
3,https://www.instagram.com/amberryphoeberry,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
4,https://www.roblox.com/groups/3857126/Amberry-...,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1873,https://www.facebook.com/Delihealth-1014197053...,1,1,{61693917},{2},change,petition_comment,26128212,Granolas Delihealth\nEn Delihealth. Solicita t...,0.0,...,,,,,,,,,,
1874,https://www.facebook.com/Delihealth-1014197053...,1,1,{61693917},{2},change,petition_comment,26128212,Granolas Delihealth\nEn Delihealth. Solicita t...,0.0,...,,,,,,,,,,
1875,https://www.change.org/p/warner-brothers-keep-...,2,1,{1198070350},{2},change,petition_comment,25699776,"<a href=""https://www.change.org/p/warner-broth...",0.0,...,,,,,,,,,,
1876,https://www.change.org/p/warner-brothers-keep-...,2,1,{1198070350},{2},change,petition_comment,25699776,"<a href=""https://www.change.org/p/warner-broth...",0.0,...,,,,,,,,,,


In [12]:
saving = "Data/_"

In [13]:
top_shared_4_months_all_platforms.to_csv(saving + "top_shared_4_months_all_platforms.csv")

# DEC

### Top shared change across all platforms

In [14]:
top_shared_dec_all_platforms = pd.merge(
    shared_dec, df_top_interacted_dec, how="inner"
)
top_shared_dec_all_platforms = top_shared_dec_all_platforms.sort_values(
    "total", ascending=False
).reset_index(drop=True)

In [15]:
top_shared_dec_all_platforms

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,n_likes,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,https://www.youtube.com/thebellatwins,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1,https://twitter.com/wwe,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
2,http://wwe.yt/wwenetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
3,http://wwe.yt/,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
4,http://www.wwe.com/sonysportsnetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",0.0,...,,,,,,,,,,
472,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",0.0,...,,,,,,,,,,
473,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",0.0,...,,,,,,,,,,
474,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",0.0,...,,,,,,,,,,


In [16]:
top_shared_dec_all_platforms.to_csv(saving + "top_shared_dec_all_platforms.csv")

In [17]:
top_shared_dec_all_platforms

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,n_likes,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,https://www.youtube.com/thebellatwins,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1,https://twitter.com/wwe,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
2,http://wwe.yt/wwenetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
3,http://wwe.yt/,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
4,http://www.wwe.com/sonysportsnetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",0.0,...,,,,,,,,,,
472,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",0.0,...,,,,,,,,,,
473,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",0.0,...,,,,,,,,,,
474,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",0.0,...,,,,,,,,,,


## Shared across all platforms

In [18]:
top_shared_4_months_all_platforms[["n_shared", "total"]].corr()

Unnamed: 0,n_shared,total
n_shared,1.0,-0.053192
total,-0.053192,1.0


In [19]:
top_shared_dec_all_platforms[["n_shared", "total"]].corr()

Unnamed: 0,n_shared,total
n_shared,1.0,-0.165175
total,-0.165175,1.0


In [20]:
top_shared_dec_all_platforms = pd.read_csv(saving + "top_shared_dec_all_platforms.csv")

In [21]:
top_shared_dec_all_platforms

Unnamed: 0.1,Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,0,https://www.youtube.com/thebellatwins,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1,1,https://twitter.com/wwe,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
2,2,http://wwe.yt/wwenetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
3,3,http://wwe.yt/,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
4,4,http://www.wwe.com/sonysportsnetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,471,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",...,,,,,,,,,,
472,472,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",...,,,,,,,,,,
473,473,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",...,,,,,,,,,,
474,474,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",...,,,,,,,,,,


In [22]:
with open("list_top_shared_urls.txt", "w") as f:
    f.write(str(list(top_shared_dec_all_platforms.shared_url.values)))

In [23]:
top_shared_dec_all_platforms

Unnamed: 0.1,Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,0,https://www.youtube.com/thebellatwins,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1,1,https://twitter.com/wwe,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
2,2,http://wwe.yt/wwenetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
3,3,http://wwe.yt/,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
4,4,http://www.wwe.com/sonysportsnetwork,1,1,{'UCJ5v_MCY6GNUBTO8-D3XoAg'},{16},youtube,comment,https://www.youtube.com/watch?v=lnlE_HEKuc0,full match - men's elimination chamber match: ...,...,,,,,,,90110984.0,70870.0,0.0,20724.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,471,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",...,,,,,,,,,,
472,472,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,14094292,"Another similar petition, please sign:\n\n<a h...",...,,,,,,,,,,
473,473,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",...,,,,,,,,,,
474,474,https://www.change.org/p/united-nations-remove...,4,1,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",...,,,,,,,,,,


In [24]:
top_shared_4_months_all_platforms

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,n_likes,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
0,https://twitter.com/AmberryYt,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
1,https://www.youtube.com/watch?v=HIShWZkr0hs&li...,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
2,https://www.instagram.com/amberryphoeberry,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
3,https://www.instagram.com/amberryphoeberry,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
4,https://www.roblox.com/groups/3857126/Amberry-...,1,1,{'UCxnYiv7s4698GRIlGE1_T7g'},{7},youtube,comment,https://www.youtube.com/watch?v=A1kOXoxgt18,the day olive was born... 1 million subscriber...,154907.0,...,,,,,,,7271086.0,4605.0,0.0,12352.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1873,https://www.facebook.com/Delihealth-1014197053...,1,1,{61693917},{2},change,petition_comment,26128212,Granolas Delihealth\nEn Delihealth. Solicita t...,0.0,...,,,,,,,,,,
1874,https://www.facebook.com/Delihealth-1014197053...,1,1,{61693917},{2},change,petition_comment,26128212,Granolas Delihealth\nEn Delihealth. Solicita t...,0.0,...,,,,,,,,,,
1875,https://www.change.org/p/warner-brothers-keep-...,2,1,{1198070350},{2},change,petition_comment,25699776,"<a href=""https://www.change.org/p/warner-broth...",0.0,...,,,,,,,,,,
1876,https://www.change.org/p/warner-brothers-keep-...,2,1,{1198070350},{2},change,petition_comment,25699776,"<a href=""https://www.change.org/p/warner-broth...",0.0,...,,,,,,,,,,


In [26]:
top_shared = pd.concat([top_shared_dec_all_platforms, top_shared_4_months_all_platforms, ])

In [46]:
top_shared = top_shared[(top_shared.text.str.lower().str.contains(" amber h")) | (top_shared.text.str.lower().str.contains("johnny"))]

In [47]:
top_shared.to_csv("Top_Shared.csv", index=False)

In [38]:
import os
os.listdir("../URLs_Data/Youtube")

['df_comments_4_months_urls.csv',
 'df_comments_4_months_urls_with_users_count.csv',
 'df_comments_dec_urls.csv',
 'df_comments_dec_urls_with_users_count.csv',
 'df_videos_4_months_urls_with_users_count.csv',
 'df_videos_dec_urls.csv',
 'df_videos_dec_urls_with_users_count.csv',
 'df_videos_urls_4_months.csv']

In [48]:
os.listdir("../Top_Interacted/Data/")

['change_4_months.csv',
 'change_dec.csv',
 'extras',
 'instagram_4_months.csv',
 'instagram_dec.csv',
 'README.md',
 'reddits_4_months.csv',
 'reddits_dec.csv',
 'Twitter',
 'twitter_4_months.csv',
 'twitter_dec.csv',
 'Youtube',
 'youtube_4_months.csv',
 'youtube_dec.csv',
 '_all_4_months.csv',
 '_all_dec.csv']

In [49]:
all_youtube_top = pd.concat(
[pd.read_csv("../Top_Interacted/Data/youtube_4_months.csv"),
pd.read_csv("../Top_Interacted/Data/youtube_dec.csv")]
)

In [57]:
all_youtube_top.head(1)

Unnamed: 0.1,Unnamed: 0,shared_url,id/url,text,n_likes,n_replies,total,url_id,n_views,n_dislikes,n_favorites,n_comments
0,0,https://youtu.be/Axcjv9YqYns,https://www.youtube.com/watch?v=UvsHWTGHmts,Disney Officially Rehires Johnny Depp After Ca...,2174.0,82.0,2256.0,youtube_comments_4_months_0,,,,


In [59]:
top_shared.head(1)

Unnamed: 0.1,Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
28,28.0,https://youtu.be/hwkb80xzdus,1,1,"('UCAIycRkUrsSP73CZIYFLCWw',)",{1},youtube,comment,https://www.youtube.com/watch?v=h-wpBkiMKd0,caso de johnny depp y amber heard: amber es at...,...,,,,,,,9402370.0,6188.0,0.0,18215.0


In [61]:
top_shared.columns

Index(['Unnamed: 0', 'shared_url', 'n_shared', 'from_n_users', 'users',
       'n_urls', 'platform', 'from', 'id/url', 'text', 'n_likes', 'total',
       'url_id', 'media_id', 'n_replies', 'score', 'user.id_str',
       'retweet_count', 'favorite_count', 'n_views', 'n_dislikes',
       'n_favorites', 'n_comments'],
      dtype='object')

In [63]:
all_youtube_top = pd.merge(all_youtube_top,top_shared, on="id/url", how="left")

In [70]:
old = [i for i in all_youtube_top.columns if "_x" in i][1:]

In [71]:
new = [i[:-2] for i in all_youtube_top.columns if "_x" in i][1:]

In [73]:
dic = {}
for i in range(len(old)):
    dic[old[i]] = new[i]

In [75]:
all_youtube_top.rename(columns=dic, inplace=True)

In [82]:
all_youtube_top = all_youtube_top[new].sort_values("total", ascending=False)

In [83]:
top_shared.columns[1:]

Index(['shared_url', 'n_shared', 'from_n_users', 'users', 'n_urls', 'platform',
       'from', 'id/url', 'text', 'n_likes', 'total', 'url_id', 'media_id',
       'n_replies', 'score', 'user.id_str', 'retweet_count', 'favorite_count',
       'n_views', 'n_dislikes', 'n_favorites', 'n_comments'],
      dtype='object')

In [84]:
all_youtube_top.columns[1:]

Index(['text', 'n_likes', 'n_replies', 'total', 'url_id', 'n_views',
       'n_dislikes', 'n_favorites', 'n_comments'],
      dtype='object')

In [85]:
set(top_shared.columns[1:]) - set(all_youtube_top.columns[1:])

{'favorite_count',
 'from',
 'from_n_users',
 'id/url',
 'media_id',
 'n_shared',
 'n_urls',
 'platform',
 'retweet_count',
 'score',
 'shared_url',
 'user.id_str',
 'users'}

In [86]:
all_youtube_top["n_shared"] = 0

In [93]:
top_with_yt = pd.concat([top_shared, all_youtube_top]).drop(columns="Unnamed: 0").reset_index(drop=True).sort_values("total", ascending=False)

In [94]:
top_with_yt

Unnamed: 0,shared_url,n_shared,from_n_users,users,n_urls,platform,from,id/url,text,n_likes,...,media_id,n_replies,score,user.id_str,retweet_count,favorite_count,n_views,n_dislikes,n_favorites,n_comments
1099,http://www.wwe.com/sonysportsnetwork,0,,,,,,,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1098,http://shop.wwe.com,0,,,,,,,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1088,https://www.youtube.com/wwetotaldivas,0,,,,,,,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1089,https://link.chtbl.com/7fp6uoqk,0,,,,,,,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
1090,http://bit.ly/afterthebellpodcast,0,,,,,,,full match - men's elimination chamber match: ...,1100425.0,...,,,,,,,90110984.0,70870.0,0.0,20724.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,https://www.change.org/p/united-nations-remove...,4,1.0,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",0.0,...,,,,,,,,,,
233,https://www.change.org/p/united-nations-remove...,4,1.0,{1048865725},{2},change,petition_comment,22874214,"New petition against Amber Heard, please sign:...",0.0,...,,,,,,,,,,
232,https://www.change.org/p/the-walt-disney-compa...,1,1.0,"(1167331579,)",{1},change,petition_comment,25829282,Johnny Depp deserves to get his career back. H...,0.0,...,,,,,,,,,,
231,https://www.change.org/p/dc-entertainment-remo...,1,1.0,"(95107240,)",{1},change,petition_comment,26152026,KAITLYN HAESTIER-KAITLYN HAESTIER\nPlease note...,0.0,...,,,,,,,,,,


In [95]:
top_with_yt.to_csv("top_shared_all_top_yt.csv", index=False)