In [4]:
import numpy as np
import pandas as pd

In [5]:
df = pd.read_excel("data/original.xlsx", dtype={
    "X (Twitter) Follower #": "Int64",
    "Facebook Follower #": "Int64",
    "Instagram Follower #": "Int64",
    "Threads Follower #": "Int64",
    "YouTube Subscriber #": "Int64",
    "TikTok Subscriber #": "Int64"
})

In [6]:
# rename the headers
df.rename(columns={"Name (English)": "Name",
           "Name (Chinese)": "Name_Chinese",
           "Region of Focus": "Region",

           "Entity owner (English)": "Owner",
           "Entity owner (Chinese)": "Owner_Chinese",
           "Parent entity (English)": "Parent",
           "Parent entity (Chinese)": "Parent_Chinese",

           "X (Twitter) handle": "Twitter",
           "X (Twitter) URL": "Twitter_url",
           "X (Twitter) Follower #": "Twitter_fol",

           "Facebook page": "Facebook",
           "Facebook URL": "Facebook_url",
           "Facebook Follower #": "Facebook_fol",

           "Instragram page": "Instagram",
           "Instagram URL": "Instagram_url",
           "Instagram Follower #": "Instagram_fol",

           "Threads account": "Threads",
           "Threads URL": "Threads_url",
           "Threads Follower #": "Threads_fol",

           "YouTube account": "Youtube",
           "YouTube URL": "Youtube_url",
           "YouTube Subscriber #": "Youtube_fol",

           "TikTok account": "Tiktok",
           "TikTok URL": "Tiktok_url",
           "TikTok Subscriber #": "Tiktok_fol"
        }, inplace=True)

Drop useless rows with no values on all the social media, and fill non-existent social media followers with 0

In [7]:
media = ["Twitter", "Facebook", "Instagram", "Threads", "Youtube", "Tiktok"]
media_fol = [s + '_fol' for s in media]
df.dropna(subset=media_fol,inplace=True, how="all")
df[media_fol] = df[media_fol].fillna(0)

Language -> region of focus show the impactiveness of the media by checking their follower numbers.
         ->

we created some classifications based on the original regions in excel

In [10]:
classification = pd.read_excel("data/regions.xlsx")

  warn(msg)


In [11]:
# rename
classification.rename(columns={"Region of Focus": "Region",
                               "STATES": "States",
                               "SPECIFIC": "Specific"}, inplace=True)
# reset the index of classification to the row "Region"
classification.set_index("Region", inplace=True)
classification

Unnamed: 0_level_0,States,Specific
Region,Unnamed: 1_level_1,Unnamed: 2_level_1
Anglosphere,Cross States,Language and Cultural Communities
China,Asia,Greater China
Afghanistan,Asia,South Asia
Yunnan,Asia,Greater China
Switzerland,Europe,Non-EU
...,...,...
Cambodia,Asia,ASEAN
Guyana,Americas,Latin America
North Macedonia,Europe,Non-EU
Sierra Leone,Africa,African Union


In [12]:
df["States"] = df["Region"].map(classification["States"])
df["Specific"] = df["Region"].map(classification["Specific"])

In [13]:
df["States"]

0      Cross States
1      Cross States
2              Asia
3              Asia
4      Cross States
           ...     
753            Asia
754            Asia
755            Asia
756    Cross States
757            Asia
Name: States, Length: 757, dtype: object

To make it easier to see the data, we change the column order

In [14]:
new_order_front = ["Name", "Name_Chinese", "Region", "Language", "States", "Specific"]
new_order_tail = [col for col in df.columns if col not in new_order_front]
new_order = new_order_front + new_order_tail
new_order

['Name',
 'Name_Chinese',
 'Region',
 'Language',
 'States',
 'Specific',
 'Owner',
 'Owner_Chinese',
 'Parent',
 'Parent_Chinese',
 'Twitter',
 'Twitter_url',
 'Twitter_fol',
 'Facebook',
 'Facebook_url',
 'Facebook_fol',
 'Instagram',
 'Instagram_url',
 'Instagram_fol',
 'Threads',
 'Threads_url',
 'Threads_fol',
 'Youtube',
 'Youtube_url',
 'Youtube_fol',
 'Tiktok',
 'Tiktok_url',
 'Tiktok_fol']

In [15]:
df = df[new_order]
df

Unnamed: 0,Name,Name_Chinese,Region,Language,States,Specific,Owner,Owner_Chinese,Parent,Parent_Chinese,...,Instagram_fol,Threads,Threads_url,Threads_fol,Youtube,Youtube_url,Youtube_fol,Tiktok,Tiktok_url,Tiktok_fol
0,Yang Xinmeng (Abby Yang),杨欣萌,Anglosphere,English,Cross States,Language and Cultural Communities,China Media Group (CMG),中央广播电视总台,Central Publicity Department,中共中央宣传部,...,9507,_bubblyabby_,https://www.threads.net/@_bubblyabby_,197,itsAbby,https://www.youtube.com/itsAbby,4680,_bubblyabby_,https://www.tiktok.com/@_bubblyabby_,660
1,CGTN Culture Express,,Anglosphere,English,Cross States,Language and Cultural Communities,China Media Group (CMG),中央广播电视总台,Central Publicity Department,中共中央宣传部,...,635,,,0,,,0,,,0
2,All-China Students' Federation,中华全国学联,China,Chinese,Asia,Greater China,All-China Students' Federation,中华全国学生联合会,Central Committee of the Communist Youth League,共青团中央,...,0,,,0,,,0,,,0
3,Chen Zhong (Dechinghutay / Ghoti),陈重,Afghanistan,Pashto,Asia,South Asia,China Media Group (CMG),中央广播电视总台,Central Publicity Department,中共中央宣传部,...,0,,,0,dechinghutay,https://www.youtube.com/channel/UCXl9X2fi65wKf...,695,dechinghutay,https://www.tiktok.com/@dechinghutay,326100
4,Yang Sheng,,Anglosphere,English,Cross States,Language and Cultural Communities,People's Daily Press,人民日报社,Central Committee of the Chinese Communist Party,中国共产党中央委员会,...,0,,,0,,,0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,Yang Gang,杨刚,Singapore,Chinese,Asia,ASEAN,Ministry of Foreign Affairs,中华人民共和国外交部,Ministry of Foreign Affairs,中华人民共和国外交部,...,0,,,0,,,0,,,0
754,Chinese Office of the Commissioner in Hong Kong,外交部駐港公署,Hong Kong,Chinese,Asia,Greater China,Office of the Commissioner of the Ministry of ...,中華人民共和國外交部駐香港特別行政區特派員公署,Ministry of Foreign Affairs,中华人民共和国外交部,...,0,,,0,,,0,,,0
755,Shiting Wenshan,视听文山,Yunnan,Chinese,Asia,Greater China,Wenshan Radio & Television Station,文山广播电视台,Wenshan Municipal People's Government,文山市人民政府,...,0,,,0,,,0,,,0
756,New Marco Polo,,Anglosphere,English,Cross States,Language and Cultural Communities,Xinhua News Agency,新华社,State Council,中华人民共和国国务院,...,0,,,0,,,0,,,0


In [16]:
df

Unnamed: 0,Name,Name_Chinese,Region,Language,States,Specific,Owner,Owner_Chinese,Parent,Parent_Chinese,...,Instagram_fol,Threads,Threads_url,Threads_fol,Youtube,Youtube_url,Youtube_fol,Tiktok,Tiktok_url,Tiktok_fol
0,Yang Xinmeng (Abby Yang),杨欣萌,Anglosphere,English,Cross States,Language and Cultural Communities,China Media Group (CMG),中央广播电视总台,Central Publicity Department,中共中央宣传部,...,9507,_bubblyabby_,https://www.threads.net/@_bubblyabby_,197,itsAbby,https://www.youtube.com/itsAbby,4680,_bubblyabby_,https://www.tiktok.com/@_bubblyabby_,660
1,CGTN Culture Express,,Anglosphere,English,Cross States,Language and Cultural Communities,China Media Group (CMG),中央广播电视总台,Central Publicity Department,中共中央宣传部,...,635,,,0,,,0,,,0
2,All-China Students' Federation,中华全国学联,China,Chinese,Asia,Greater China,All-China Students' Federation,中华全国学生联合会,Central Committee of the Communist Youth League,共青团中央,...,0,,,0,,,0,,,0
3,Chen Zhong (Dechinghutay / Ghoti),陈重,Afghanistan,Pashto,Asia,South Asia,China Media Group (CMG),中央广播电视总台,Central Publicity Department,中共中央宣传部,...,0,,,0,dechinghutay,https://www.youtube.com/channel/UCXl9X2fi65wKf...,695,dechinghutay,https://www.tiktok.com/@dechinghutay,326100
4,Yang Sheng,,Anglosphere,English,Cross States,Language and Cultural Communities,People's Daily Press,人民日报社,Central Committee of the Chinese Communist Party,中国共产党中央委员会,...,0,,,0,,,0,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753,Yang Gang,杨刚,Singapore,Chinese,Asia,ASEAN,Ministry of Foreign Affairs,中华人民共和国外交部,Ministry of Foreign Affairs,中华人民共和国外交部,...,0,,,0,,,0,,,0
754,Chinese Office of the Commissioner in Hong Kong,外交部駐港公署,Hong Kong,Chinese,Asia,Greater China,Office of the Commissioner of the Ministry of ...,中華人民共和國外交部駐香港特別行政區特派員公署,Ministry of Foreign Affairs,中华人民共和国外交部,...,0,,,0,,,0,,,0
755,Shiting Wenshan,视听文山,Yunnan,Chinese,Asia,Greater China,Wenshan Radio & Television Station,文山广播电视台,Wenshan Municipal People's Government,文山市人民政府,...,0,,,0,,,0,,,0
756,New Marco Polo,,Anglosphere,English,Cross States,Language and Cultural Communities,Xinhua News Agency,新华社,State Council,中华人民共和国国务院,...,0,,,0,,,0,,,0


We add a "most influencing" row to the stuff

In [17]:
df["Max_media_fol"] = df[media_fol].max(axis=1).astype("Int64")
df["Max_media"] = df[media_fol].idxmax(axis=1).apply(lambda x: x[:-4])

In [18]:
df["Max_media_fol"]

0      1387432
1         2488
2         5136
3      1600000
4         2821
        ...   
753       1700
754     853000
755         15
756    2300000
757    1400000
Name: Max_media_fol, Length: 757, dtype: Int64

In [19]:
df["Max_media"]

0      Facebook
1       Twitter
2       Twitter
3      Facebook
4       Twitter
         ...   
753    Facebook
754    Facebook
755    Facebook
756    Facebook
757    Facebook
Name: Max_media, Length: 757, dtype: object

In [20]:
df["Sum_fol"] = df[media_fol].sum(axis=1).astype("Int64")

In [21]:
df.to_csv("data/results.csv")