In [1]:
import pandas as pd
import scipy.stats as stats
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
# Setup initial data frame
df = pd.read_csv('survey_results.csv')
cols = ['timestamp', 'age', 'pronouns', 'nationality', 'usage', 'first_page', 'for_you', 'following', 'favorites', 'friends', 'discover', 'search', 
        'like', 'comment', 'save', 'follow', 'share', 'not_interested', 'refresh', 'interesting', 'search_for_you', 'search_scroll', 'search_again', 
        'search_other', 'discover_for_you', 'discover_stay', 'discover_other', 'count_videos_account', 'politics', 'sports', 'eating_habits', 
        'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 'other', 'political_topics', 'conspiracy_topics', 
        'eating_habits_topics']
df.columns = cols
len(df)

139

In [3]:
# Clean Data
# Remove participants that do not use TikTok
df = df[df['usage'] != 'Never']

# Remove non valid age groups (NaN)
df['age'] = df['age'].astype('int64')
df = df[pd.to_numeric(df['age'], errors='coerce').notnull()]

# Separate age into groups (Youth [0-19], Young Adults [20-29], Adults [30-64], Seniors [65+])
age_limits = [0,19,29,64,150]
age_groups = ['Youth','Young Adult','Adult','Senior']
df['age_group'] = pd.cut(df['age'], bins=age_limits, labels=age_groups, right=False)

# Remove age groups that are under-represented (<15%)
counts = df['age_group'].value_counts(normalize=True)
df = df[df['age_group'].isin(counts[counts.gt(0.15)].index)]

# Remove pronouns that are under-represented (<15%)
counts = df['pronouns'].value_counts(normalize=True)
df = df[df['pronouns'].isin(counts[counts.gt(0.15)].index)]

# Divide dataframes into gendered dataframes between she/her and he/him
df_she = df[df['pronouns'] == 'She/her']
df_he = df[df['pronouns'] == 'He/him']

print("Length of Sher/her dataframe is " + str(len(df_she)))
print("Length of He/him dataframe is " + str(len(df_he)))

Length of Sher/her dataframe is 87
Length of He/him dataframe is 40


In [4]:
# Participants average age
age_stats = stats.ttest_ind(a=df_she['age'], b=df_he['age'], equal_var=True)
print("The average age of respondants is equivalent for both genders: " + str(age_stats.pvalue > 0.05))

The average age of respondants is equivalent for both genders: True


In [5]:
# Gender comparison for age group distribution
print(pd.crosstab(df.age_group, df.pronouns))
age_group_stats = stats.chi2_contingency(pd.crosstab(df.age_group, df.pronouns))
print("The age group distribution is equivalent for both genders: " + str(age_group_stats.pvalue > 0.05))

pronouns     He/him  She/her
age_group                   
Youth             8       12
Young Adult      32       75
The age group distribution is equivalent for both genders: True


In [6]:
# Gender comparison for TikTok use distribution
print(pd.crosstab(df.usage, df.pronouns))
usage_stats = stats.chi2_contingency(pd.crosstab(df.usage, df.pronouns))
print("There is no relationhsip between gender and amount of TikTok use: " + str(usage_stats.pvalue > 0.05))

pronouns              He/him  She/her
usage                                
Multiple times a day      22       53
Once every week            3        1
Once everyday             10       23
Once in a while            5       10
There is no relationhsip between gender and amount of TikTok use: True


In [7]:
# Gender comparison for first page verification choice distribution
print(pd.crosstab(df.first_page, df.pronouns))
first_page_stats = stats.chi2_contingency(pd.crosstab(df.first_page, df.pronouns))
print("There is no relationship between gender and the first page used when entering TikTok: " + str(first_page_stats.pvalue > 0.05))

pronouns               He/him  She/her
first_page                            
Discover                    0        2
Following                   2        4
For You                    32       77
Friends                     3        2
Search                      2        2
Tiktok sent by friend       1        0
There is no relationship between gender and the first page used when entering TikTok: True


In [8]:
# Gender comparison for content deemed interesting distribution
print(pd.crosstab(df.interesting, df.pronouns))
interesting_stats = stats.chi2_contingency(pd.crosstab(df.interesting, df.pronouns))
print("There is not relationship between gender and the amount of content seemed interesting in For You page: " + str(interesting_stats.pvalue > 0.05))

pronouns     He/him  She/her
interesting                 
Always           11       16
Never             1        0
Often            21       63
Sometimes         7        8
There is not relationship between gender and the amount of content seemed interesting in For You page: True


In [9]:
# Gender comparison for number of videos watched when checking an account distribution
print(pd.crosstab(df.count_videos_account, df.pronouns))
count_videos_account_stats = stats.chi2_contingency(pd.crosstab(df.count_videos_account, df.pronouns))
print("There is no relationship between gender and the amount of videos watched when checking an account: " + str(count_videos_account_stats.pvalue > 0.05))

pronouns                He/him  She/her
count_videos_account                   
1-5 videos                  27       68
Never go into accounts       0        2
Upto 10 vidoes              13       17
There is no relationship between gender and the amount of videos watched when checking an account: True


In [10]:
# How often participants use a page
df_pages = pd.DataFrame()

print(pd.crosstab(df.for_you, df.pronouns))
for_you_stats = stats.chi2_contingency(pd.crosstab(df.for_you, df.pronouns))
print("There is no relationship between gender and staying in the for you page: " + str(for_you_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.following, df.pronouns))
following_stats = stats.chi2_contingency(pd.crosstab(df.following, df.pronouns))
print("There is no relationship between gender and staying in the following page: "+ str(following_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.favorites, df.pronouns))
favorites_stats = stats.chi2_contingency(pd.crosstab(df.favorites, df.pronouns))
print("There is no relationship between gender and staying in the favorites page: " + str(favorites_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.friends, df.pronouns))
friends_stats = stats.chi2_contingency(pd.crosstab(df.friends, df.pronouns))
print("There is no relationship between gender and staying in the friends page: " + str(friends_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.discover, df.pronouns))
discover_stats = stats.chi2_contingency(pd.crosstab(df.discover, df.pronouns))
print("There is no relationship between gender and staying in the discover page: " + str(discover_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.search, df.pronouns))
search_stats = stats.chi2_contingency(pd.crosstab(df.search, df.pronouns))
print("There is no relationship between gender and staying in the search page: " + str(search_stats.pvalue > 0.05) + "\n")

pronouns   He/him  She/her
for_you                   
Always         27       68
Never           1        2
Often           6       13
Rarely          2        3
Sometimes       4        1
There is no relationship between gender and staying in the for you page: True

pronouns   He/him  She/her
following                 
Always          2        5
Never          10       20
Often           4       12
Rarely         13       33
Sometimes      11       17
There is no relationship between gender and staying in the following page: True

pronouns   He/him  She/her
favorites                 
Always          0        2
Never          18       27
Often           5       13
Rarely         10       29
Sometimes       7       16
There is no relationship between gender and staying in the favorites page: True

pronouns   He/him  She/her
friends                   
Always          3        2
Never          21       23
Often           2        8
Rarely         11       36
Sometimes       3       18
The

In [11]:
# How much participants do an action while watching a video
df_features = pd.DataFrame()

print(pd.crosstab(df.like, df.pronouns))
like_stats = stats.chi2_contingency(pd.crosstab(df.like, df.pronouns))
print("There is no relationship between gender and liking a video: " + str(like_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.comment, df.pronouns))
comment_stats = stats.chi2_contingency(pd.crosstab(df.comment, df.pronouns))
print("There is no relationship between gender and commenting on a video: " + str(comment_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.save, df.pronouns))
save_stats = stats.chi2_contingency(pd.crosstab(df.save, df.pronouns))
print("There is no relationship between gender and saving a video: "+ str(save_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.follow, df.pronouns))
follow_stats = stats.chi2_contingency(pd.crosstab(df.follow, df.pronouns))
print("There is no relationship between gender and following an account: " + str(follow_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.share, df.pronouns))
share_stats = stats.chi2_contingency(pd.crosstab(df.share, df.pronouns))
print("There is no relationship between gender and sharing a video: " + str(share_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.not_interested, df.pronouns))
not_interested_stats = stats.chi2_contingency(pd.crosstab(df.not_interested, df.pronouns))
print("There is no relationship between gender and selecting not interested on a video: " + str(not_interested_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.refresh, df.pronouns))
refresh_stats = stats.chi2_contingency(pd.crosstab(df.refresh, df.pronouns))
print("There is no relationship between gender and refreshing the page: " + str(refresh_stats.pvalue > 0.05) + "\n")

pronouns   He/him  She/her
like                      
Always         10       17
Never           7        7
Often          13       35
Rarely          3       13
Sometimes       7       15
There is no relationship between gender and liking a video: True

pronouns   He/him  She/her
comment                   
Always          2        8
Never          19       32
Often           4        4
Rarely          7       27
Sometimes       8       16
There is no relationship between gender and commenting on a video: True

pronouns   He/him  She/her
save                      
Always          4       14
Never           7        7
Often          11       19
Rarely          8       12
Sometimes      10       35
There is no relationship between gender and saving a video: True

pronouns   He/him  She/her
follow                    
Always          1        2
Never           9       12
Often           0       10
Rarely         15       37
Sometimes      15       26
There is no relationship between gender

In [12]:
# How often participants do a certain action after searching
df_search = pd.DataFrame()

print(pd.crosstab(df.search_for_you, df.pronouns))
search_for_you_stats = stats.chi2_contingency(pd.crosstab(df.search_for_you, df.pronouns))
print("There is no relationship between gender and going to the for you page after a search: " + str(search_for_you_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.search_scroll, df.pronouns))
search_scroll_stats = stats.chi2_contingency(pd.crosstab(df.search_scroll, df.pronouns))
print("There is no relationship between gender and continuously scrolling after a search: " + str(search_scroll_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.search_again, df.pronouns))
search_again_stats = stats.chi2_contingency(pd.crosstab(df.search_again, df.pronouns))
print("There is no relationship between gender and searching a second time: " + str(search_again_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.search_other, df.pronouns))
search_other_stats = stats.chi2_contingency(pd.crosstab(df.search_other, df.pronouns))
print("There is no relationship between gender and doing other actions after a search: " + str(search_other_stats.pvalue > 0.05) + "\n")

pronouns        He/him  She/her
search_for_you                 
Always              13       23
Never                4        2
Often               15       46
Rarely               1        5
Sometimes            7       11
There is no relationship between gender and going to the for you page after a search: True

pronouns       He/him  She/her
search_scroll                 
Always              3        7
Never               6        3
Often               9       36
Rarely             14        9
Sometimes           8       32
There is no relationship between gender and continuously scrolling after a search: False

pronouns      He/him  She/her
search_again                 
Always             0        2
Never             12        6
Often              6       16
Rarely            14       22
Sometimes          8       41
There is no relationship between gender and searching a second time: False

pronouns      He/him  She/her
search_other                 
Always             1        0
N

In [13]:
# How often participants do a certain action after going on the discover page
df_discover = pd.DataFrame()

print(pd.crosstab(df.discover_for_you, df.pronouns))
discover_for_you_stats = stats.chi2_contingency(pd.crosstab(df.discover_for_you, df.pronouns))
print("There is no relationship between gender and going to the for you page after checking the discover page: " + str(discover_for_you_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.discover_stay, df.pronouns))
discover_stay_stats = stats.chi2_contingency(pd.crosstab(df.discover_stay, df.pronouns))
print("There is no relationship between gender and staying in the discover page: " + str(discover_stay_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.discover_other, df.pronouns))
discover_other_stats = stats.chi2_contingency(pd.crosstab(df.discover_other, df.pronouns))
print("There is no relationship between gender and doing other actions after checking the discover page: " + str(discover_other_stats.pvalue > 0.05) + "\n")

pronouns          He/him  She/her
discover_for_you                 
Always                15       25
Never                  8       15
Often                  8       36
Rarely                 2        3
Sometimes              7        8
There is no relationship between gender and going to the for you page after checking the discover page: True

pronouns       He/him  She/her
discover_stay                 
Always              1        1
Never              21       42
Often               1        7
Rarely              6       18
Sometimes          11       19
There is no relationship between gender and staying in the discover page: True

pronouns        He/him  She/her
discover_other                 
Always               1        1
Never               29       56
Often                0        4
Rarely               4       14
Sometimes            6       12
There is no relationship between gender and doing other actions after checking the discover page: True



In [14]:
# How often participants watch a video of a certain topic
df_topics = pd.DataFrame()

print(pd.crosstab(df.politics, df.pronouns))
politics_stats = stats.chi2_contingency(pd.crosstab(df.politics, df.pronouns))
print("There is no relationship between gender and watching political content: " + str(politics_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.sports, df.pronouns))
sports_stats = stats.chi2_contingency(pd.crosstab(df.sports, df.pronouns))
print("There is no relationship between gender and watching content about sports: " + str(sports_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.eating_habits, df.pronouns))
eating_habits_stats = stats.chi2_contingency(pd.crosstab(df.eating_habits, df.pronouns))
print("There is no relationship between gender and watching content about eating habits: " + str(eating_habits_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.memes, df.pronouns))
memes_stats = stats.chi2_contingency(pd.crosstab(df.memes, df.pronouns))
print("There is no relationship between gender and watching memes: " + str(memes_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.conspiracies, df.pronouns))
conspiracies_stats = stats.chi2_contingency(pd.crosstab(df.conspiracies, df.pronouns))
print("There is no relationship between gender and watching conspiracy theories: " + str(conspiracies_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.pets, df.pronouns))
pets_stats = stats.chi2_contingency(pd.crosstab(df.pets, df.pronouns))
print("There is no relationship between gender and watching content about pets: " + str(pets_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.dance, df.pronouns))
dance_stats = stats.chi2_contingency(pd.crosstab(df.dance, df.pronouns))
print("There is no relationship between gender and watching dancing content: " + str(dance_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.entertainment, df.pronouns))
entertainment_stats = stats.chi2_contingency(pd.crosstab(df.entertainment, df.pronouns))
print("There is no relationship between gender and watching entertainment content: " + str(entertainment_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.food, df.pronouns))
food_stats = stats.chi2_contingency(pd.crosstab(df.food, df.pronouns))
print("There is no relationship between gender and watching food content: " + str(food_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.games, df.pronouns))
games_stats = stats.chi2_contingency(pd.crosstab(df.games, df.pronouns))
print("There is no relationship between gender and watching gaming content: " + str(games_stats.pvalue > 0.05) + "\n")

print(pd.crosstab(df.other, df.pronouns))
other_stats = stats.chi2_contingency(pd.crosstab(df.other, df.pronouns))
print("There is no relationship between gender and watching other type of content not covered: " + str(other_stats.pvalue > 0.05) + "\n")

pronouns   He/him  She/her
politics                  
Always          0        1
Never           6       13
Often           9       20
Rarely         11       26
Sometimes      14       27
There is no relationship between gender and watching political content: True

pronouns   He/him  She/her
sports                    
Always          4        1
Never          11       20
Often           6        9
Rarely          9       31
Sometimes      10       26
There is no relationship between gender and watching content about sports: True

pronouns       He/him  She/her
eating_habits                 
Always              1        1
Never              13       10
Often               5       20
Rarely             12       23
Sometimes           9       33
There is no relationship between gender and watching content about eating habits: False

pronouns   He/him  She/her
memes                     
Always         10       17
Never           1        4
Often          23       33
Rarely          1     