### User Story No. 1
*When I choose a social media network, I am able to get a view the number of users in each
country.*

In [1]:
# Importing library
import pandas as pd

In [2]:
# Wrangle function for cleaning and loading the datasets
def wrangle(filepath):
    #  Load the Excel file to DF
    df = pd.read_csv(filepath, index_col=0)
    df.rename(columns=df.iloc[0, 0:8], inplace=True)

    # Clean the data and return only the no. of users
    df.columns = df.columns.str.strip()
    df = df.iloc[1:11, :]
    for column in df.columns:
        df[column] = df[column].str.split('(',expand=True)[0].str.replace(r'[^0-9]', '', regex=True)
    return df

In [3]:
# Loading the dataset into pandas data frame
df1 = wrangle('../input/social-media-geographics-csv-ver/SocMed_Geographic.csv')
print('data frame shape: ', df1.shape)
df1.head(20)

data frame shape:  (10, 8)


Unnamed: 0_level_0,United States,Indonesia,Singapore,China,India,Vietnam,Philippines,Bangladesh
Social Media Platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bilibili,,,,27400000,,,,
Facebook,190000000.0,120000000.0,4450000.0,3000000,280000000.0,63000000.0,72000000.0,37000000.0
Instagram,107000000.0,59840000.0,2110000.0,3600000,88000000.0,8000000.0,1100000.0,1676100.0
Linkedln,160000000.0,15240000.0,2682000.0,5397000,62000000.0,3087500.0,2773485.0,2990000.0
Tencent QQ,,,,659100000,,,,
Tik Tok,45400000.0,81000000.0,800000.0,400000000,120000000.0,12000000.0,,
Twitter,55700000.0,11800000.0,1370000.0,3200000,13150000.0,127000000.0,10400000.0,3000000.0
WeChat,,,1000000.0,900000000,,,,
Weibo,,,800000.0,480000000,,600000.0,,
Youtube,192000000.0,132000000.0,43000000.0,580000000,265000000.0,6200000.0,,29000000.0


In [4]:
# Function for viewing the number of users of each social media per country
def user_pop_viewer(appname):
    df2 = df1.loc[appname].dropna().astype(int)
    df2 = df2.sort_values(ascending=False)
    print('Number of ', appname, ' Users per Country')
    print(df2, '\n\n')

In [5]:
# NOTE: Type the app you're interested in. [Bilibili, Facebook, Instagram, Linkedln, Tencent QQ, Tik Tok, Twitter, WeChat, Weibo, Youtube]

user_pop_viewer('Bilibili')
user_pop_viewer('Facebook')
user_pop_viewer('Instagram')
user_pop_viewer('Linkedln')
user_pop_viewer('Tencent QQ')
user_pop_viewer('Tik Tok')
user_pop_viewer('Twitter')
user_pop_viewer('WeChat')
user_pop_viewer('Weibo')
user_pop_viewer('Youtube ')

Number of  Bilibili  Users per Country
China    27400000
Name: Bilibili, dtype: int64 


Number of  Facebook  Users per Country
India            280000000
United States    190000000
Indonesia        120000000
Philippines       72000000
Vietnam           63000000
Bangladesh        37000000
Singapore          4450000
China              3000000
Name: Facebook, dtype: int64 


Number of  Instagram  Users per Country
United States    107000000
India             88000000
Indonesia         59840000
Vietnam            8000000
China              3600000
Singapore          2110000
Bangladesh         1676100
Philippines        1100000
Name: Instagram, dtype: int64 


Number of  Linkedln  Users per Country
United States    160000000
India             62000000
Indonesia         15240000
China              5397000
Vietnam            3087500
Bangladesh         2990000
Philippines        2773485
Singapore          2682000
Name: Linkedln, dtype: int64 


Number of  Tencent QQ  Users per Country
China  

### User Story No. 2
*When I choose a country, I am able to see all the relevant social networks its people use ranked
in descending order.*

In [6]:
# Function for viewing the number of social media users in each country
def socmed_user_viewer(country):
    df2 = df1[country].dropna().astype(int)
    df2 = df2.sort_values(ascending=False)
    print(country, ' : ', df2, '\n\n')

In [7]:
# NOTE: Enter the country that you're interested in. [United States, Indonesia, Singapore, China, India, Vietnam, Philippines, Bangladesh]

socmed_user_viewer('United States')
socmed_user_viewer('Indonesia')
socmed_user_viewer('Singapore')
socmed_user_viewer('China')
socmed_user_viewer('India')
socmed_user_viewer('Vietnam')
socmed_user_viewer('Philippines')
socmed_user_viewer('Bangladesh')

United States  :  Social Media Platform
Youtube      192000000
Facebook     190000000
Linkedln     160000000
Instagram    107000000
Twitter       55700000
Tik Tok       45400000
Name: United States, dtype: int64 


Indonesia  :  Social Media Platform
Youtube      132000000
Facebook     120000000
Tik Tok       81000000
Instagram     59840000
Linkedln      15240000
Twitter       11800000
Name: Indonesia, dtype: int64 


Singapore  :  Social Media Platform
Youtube      43000000
Facebook      4450000
Linkedln      2682000
Instagram     2110000
Twitter       1370000
WeChat        1000000
Tik Tok        800000
Weibo          800000
Name: Singapore, dtype: int64 


China  :  Social Media Platform
WeChat        900000000
Tencent QQ    659100000
Youtube       580000000
Weibo         480000000
Tik Tok       400000000
Bilibili       27400000
Linkedln        5397000
Instagram       3600000
Twitter         3200000
Facebook        3000000
Name: China, dtype: int64 


India  :  Social Media Platform


### User Story No. 3
*I can view a comparison of the percentage of users based on the population for each social
media network by the population of the country. (You will have to scrape the population data and
provide the source(s).)*

In [8]:
# Load the population dataset
df3 = pd.read_csv('../input/countries-in-the-world-by-population-2022/world_population.csv', index_col=0)
df3.head()

Unnamed: 0_level_0,Population (2020),Yearly Change,Net Change,Density (P/Km²),Land Area (Km²),Migrants (net),Fert. Rate,Med. Age,Urban Pop %,World Share
Country/Other,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Afghanistan,38928346,2.33,886592,60,652860,-62920,4.6,18,25.0,0.5
Albania,2877797,-0.11,-3120,105,27400,-14000,1.6,36,63.0,0.04
Algeria,43851044,1.85,797990,18,2381740,-10000,3.1,29,73.0,0.56
Angola,32866272,3.27,1040977,26,1246700,6413,5.6,17,67.0,0.42
Antigua and Barbuda,97929,0.84,811,223,440,0,2.0,34,26.0,0.0


In [9]:
# Population of Countries in this notebook
for country in df1.columns:
    pop = df3.loc[country]['Population (2020)'].astype(int)
    print(country, '  :  ', pop, '\n')

United States   :   331002651 

Indonesia   :   273523615 

Singapore   :   5850342 

China   :   1439323776 

India   :   1380004385 

Vietnam   :   97338579 

Philippines   :   109581078 

Bangladesh   :   164689383 



In [10]:
# Function for locating the population of a country
def pop_locator(country):
    df4 = df3.loc[country]['Population (2020)']
    return df4

In [11]:
for country in df1.columns:
    df1[country] = df1[country].fillna(0).astype(int) / pop_locator(country) * 100
    df1[country] = df1[country].round(2)

In [12]:
print('Percentage of Users per Population per Country'.center(91, '-'), '\n')
df1.head(10)

-----------------------Percentage of Users per Population per Country---------------------- 



Unnamed: 0_level_0,United States,Indonesia,Singapore,China,India,Vietnam,Philippines,Bangladesh
Social Media Platform,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bilibili,0.0,0.0,0.0,1.9,0.0,0.0,0.0,0.0
Facebook,57.4,43.87,76.06,0.21,20.29,64.72,65.7,22.47
Instagram,32.33,21.88,36.07,0.25,6.38,8.22,1.0,1.02
Linkedln,48.34,5.57,45.84,0.37,4.49,3.17,2.53,1.82
Tencent QQ,0.0,0.0,0.0,45.79,0.0,0.0,0.0,0.0
Tik Tok,13.72,29.61,13.67,27.79,8.7,12.33,0.0,0.0
Twitter,16.83,4.31,23.42,0.22,0.95,130.47,9.49,1.82
WeChat,0.0,0.0,17.09,62.53,0.0,0.0,0.0,0.0
Weibo,0.0,0.0,13.67,33.35,0.0,0.62,0.0,0.0
Youtube,58.01,48.26,735.0,40.3,19.2,6.37,0.0,17.61
