In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
# Setup initial data frame
df = pd.read_csv('survey_results.csv')
cols = ['timestamp', 'age', 'pronouns', 'nationality', 'usage', 'first_page', 'for_you', 'following', 'favorites', 'friends', 'discover', 'search',
        'like', 'comment', 'save', 'follow', 'share', 'not_interested', 'refresh', 'interesting', 'search_for_you', 'search_scroll', 'search_again',
        'search_other', 'discover_for_you', 'discover_stay', 'discover_other', 'count_videos_account', 'politics', 'sports', 'eating_habits',
        'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 'other', 'political_topics', 'conspiracy_topics',
        'eating_habits_topics']
df.columns = cols
len(df)

139

In [3]:
# Clean Data
# Remove participants that do not use TikTok
df = df[df['usage'] != 'Never']

# Remove non valid age groups (NaN)
df['age'] = df['age'].astype('int64')
df = df[pd.to_numeric(df['age'], errors='coerce').notnull()]

# Separate age into groups (Youth [0-19], Young Adults [20-29], Adults [30-64], Seniors [65+])
age_limits = [0,19,29,64,150]
age_groups = ['Youth','Young Adult','Adult','Senior']
df['age_group'] = pd.cut(df['age'], bins=age_limits, labels=age_groups, right=False)

# Remove age groups that are under-represented (<15%)
counts = df['age_group'].value_counts(normalize=True)
df = df[df['age_group'].isin(counts[counts.gt(0.15)].index)]

# Remove pronouns that are under-represented (<15%)
counts = df['pronouns'].value_counts(normalize=True)
df = df[df['pronouns'].isin(counts[counts.gt(0.15)].index)]

len(df)

127

In [4]:
# Participants ages sorted by percentage
df_age = df.groupby(['age'], observed=False)
df_age.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

Unnamed: 0,age,percentage
0,15,0.79
1,17,3.94
2,18,11.02
3,19,20.47
4,20,24.41
5,21,19.69
6,22,11.81
7,23,3.15
8,24,1.57
9,26,1.57


In [5]:
# Participants average age
print("Average age of participants is " + df['age'].mean().astype('int64').astype('str') + " years old")

Average age of participants is 20 years old


In [6]:
# Participants age groups sorted by percentage
df_age_group = df.groupby(['age_group'], observed=False)
df_age_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,age_group,percentage
0,Young Adult,84.25
1,Youth,15.75
2,Adult,0.0
3,Senior,0.0


In [7]:
# Participants pronouns/gender sorted by percentage
df_pronouns_group = df.groupby(['pronouns'], observed=False)
df_pronouns_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,pronouns,percentage
0,She/her,68.5
1,He/him,31.5


In [8]:
# Participants nationalities or ethnicities sorted by percentage (to be cleaned)
df['nationality'] = df['nationality'].str.lower()
df_nationality_group = df.groupby(['nationality'], observed=False)
df_nationality_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,nationality,percentage
0,canadian,14.17
1,chinese,7.87
2,white,3.94
3,french,3.15
4,white canadian,2.36
5,brazilian,2.36
6,turkish,2.36
7,irish,2.36
8,indian,2.36
9,asian,2.36


In [9]:
# Initial TikTok use sorted by percentage
df_usage_group = df.groupby(['usage'], observed=False)
df_usage_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,usage,percentage
0,Multiple times a day,59.06
1,Once everyday,25.98
2,Once in a while,11.81
3,Once every week,3.15


In [10]:
# First page that participants choose to verify sorted by percentage
df_first_page_group = df.groupby(['first_page'], observed=False)
df_first_page_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,first_page,percentage
0,For You,85.83
1,Following,4.72
2,Friends,3.94
3,Search,3.15
4,Discover,1.57
5,Tiktok sent by friend,0.79


In [11]:
# Percentage of content deemed interesting by participants in the For You page
df_interesting_group = df.groupby(['interesting'], observed=False)
df_interesting_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,interesting,percentage
0,Often,66.14
1,Always,21.26
2,Sometimes,11.81
3,Never,0.79


In [12]:
# Percentage of numbers of videos watched when checking an account by participants
df_count_videos_account_group = df.groupby(['count_videos_account'], observed=False)
df_count_videos_account_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,count_videos_account,percentage
0,1-5 videos,74.8
1,Upto 10 vidoes,23.62
2,Never go into accounts,1.57


In [13]:
# Percentage of respondents based on how much they verify a certain page
df_pages = pd.DataFrame()

df_for_you = df.groupby(['for_you'], observed=False)
df_pages[['for_you', 'for_you_prob']] = df_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_following = df.groupby(['following'], observed=False)
df_pages[['following', 'following_prob']] = df_following.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_favorites = df.groupby(['favorites'], observed=False)
df_pages[['favorites', 'favorites_prob']] = df_favorites.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_friends = df.groupby(['friends'], observed=False)
df_pages[['friends', 'friends_prob']] = df_friends.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover = df.groupby(['discover'], observed=False)
df_pages[['discover', 'discover_prob']] = df_discover.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search = df.groupby(['search'], observed=False)
df_pages[['search', 'search_prob']] = df_search.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_pages

Unnamed: 0,for_you,for_you_prob,following,following_prob,favorites,favorites_prob,friends,friends_prob,discover,discover_prob,search,search_prob
0,Always,74.8,Always,5.51,Always,1.57,Always,3.94,Always,2.36,Always,4.72
1,Never,2.36,Never,23.62,Never,35.43,Never,34.65,Never,51.97,Never,2.36
2,Often,14.96,Often,12.6,Often,14.17,Often,7.87,Often,3.94,Often,34.65
3,Rarely,3.94,Rarely,36.22,Rarely,30.71,Rarely,37.01,Rarely,26.77,Rarely,16.54
4,Sometimes,3.94,Sometimes,22.05,Sometimes,18.11,Sometimes,16.54,Sometimes,14.96,Sometimes,41.73


In [14]:
# Probability setting of staying, or leaving a page based on participants answers
df_pages = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Leaving Probability'
df[['for_you', 'following', 'favorites', 'friends',
    'discover', 'search']] = df[['for_you', 'following', 'favorites', 'friends',
                                'discover', 'search']].replace(['Never', 'Rarely', 'Sometimes'], 'Leaving Probability')

# ['Often', 'Always'] = 'Staying Probability'
df[['for_you', 'following', 'favorites', 'friends',
    'discover', 'search']] = df[['for_you', 'following', 'favorites', 'friends',
                                 'discover', 'search']].replace(['Often', 'Always'], 'Staying Probability')

# Group pages by probability of staying, or leaving a page
df_for_you = df.groupby(['for_you'], observed=False)
df_pages[['for_you', 'for_you_prob']] = df_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_following = df.groupby(['following'], observed=False)
df_pages[['following', 'following_prob']] = df_following.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_favorites = df.groupby(['favorites'], observed=False)
df_pages[['favorites', 'favorites_prob']] = df_favorites.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_friends = df.groupby(['friends'], observed=False)
df_pages[['friends', 'friends_prob']] = df_friends.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover = df.groupby(['discover'], observed=False)
df_pages[['discover', 'discover_prob']] = df_discover.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search = df.groupby(['search'], observed=False)
df_pages[['search', 'search_prob']] = df_search.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_pages

Unnamed: 0,for_you,for_you_prob,following,following_prob,favorites,favorites_prob,friends,friends_prob,discover,discover_prob,search,search_prob
0,Leaving Probability,10.24,Leaving Probability,81.89,Leaving Probability,84.25,Leaving Probability,88.19,Leaving Probability,93.7,Leaving Probability,60.63
1,Staying Probability,89.76,Staying Probability,18.11,Staying Probability,15.75,Staying Probability,11.81,Staying Probability,6.3,Staying Probability,39.37


In [15]:
# Percentage of respondents based on how much they use a certain feature
df_features = pd.DataFrame()

df_like = df.groupby(['like'], observed=False)
df_features[['like', 'like_p']] = df_like.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_comment = df.groupby(['comment'], observed=False)
df_features[['comment', 'comment_p']] = df_comment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_save = df.groupby(['save'], observed=False)
df_features[['save', 'save_p']] = df_save.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_follow = df.groupby(['follow'], observed=False)
df_features[['follow', 'follow_p']] = df_follow.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_share = df.groupby(['share'], observed=False)
df_features[['share', 'share_p']] = df_share.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_not_interested = df.groupby(['not_interested'], observed=False)
df_features[['not_interested', 'not_interested_p']] = df_not_interested.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_refresh = df.groupby(['refresh'], observed=False)
df_features[['refresh', 'refresh_p']] = df_refresh.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_features

Unnamed: 0,like,like_p,comment,comment_p,save,save_p,follow,follow_p,share,share_p,not_interested,not_interested_p,refresh,refresh_p
0,Always,21.26,Always,7.87,Always,14.17,Always,2.36,Always,14.96,Always,3.94,Always,4.72
1,Never,11.02,Never,40.16,Never,11.02,Never,16.54,Never,11.02,Never,18.9,Never,23.58
2,Often,37.8,Often,6.3,Often,23.62,Often,7.87,Often,38.58,Often,8.66,Often,20.75
3,Rarely,12.6,Rarely,26.77,Rarely,15.75,Rarely,40.94,Rarely,15.75,Rarely,40.16,Rarely,28.3
4,Sometimes,17.32,Sometimes,18.9,Sometimes,35.43,Sometimes,32.28,Sometimes,19.69,Sometimes,28.35,Sometimes,22.64


In [16]:
# Probability setting of using, or not using a feature based on participants usage
df_features = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Not Using Probability'
df[['like', 'comment', 'save', 'follow', 'share', 
    'not_interested', 'refresh']] = df[['like', 'comment', 'save', 'follow', 'share', 
                                        'not_interested', 'refresh']].replace(['Never', 'Rarely', 'Sometimes'], 'Not Using Probability')

# ['Often', 'Always'] = 'Using Probability'
df[['like', 'comment', 'save', 'follow', 'share', 
    'not_interested', 'refresh']] = df[['like', 'comment', 'save', 'follow', 'share', 
                                        'not_interested', 'refresh']].replace(['Often', 'Always'], 'Using Probability')

# Group pages by probability of using, or not using a feature
df_like = df.groupby(['like'], observed=False)
df_features[['like', 'like_p']] = df_like.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_comment = df.groupby(['comment'], observed=False)
df_features[['comment', 'comment_p']] = df_comment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_save = df.groupby(['save'], observed=False)
df_features[['save', 'save_p']] = df_save.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_follow = df.groupby(['follow'], observed=False)
df_features[['follow', 'follow_p']] = df_follow.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_share = df.groupby(['share'], observed=False)
df_features[['share', 'share_p']] = df_share.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_not_interested = df.groupby(['not_interested'], observed=False)
df_features[['not_interested', 'not_interested_p']] = df_not_interested.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_refresh = df.groupby(['refresh'], observed=False)
df_features[['refresh', 'refresh_p']] = df_refresh.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_features

Unnamed: 0,like,like_p,comment,comment_p,save,save_p,follow,follow_p,share,share_p,not_interested,not_interested_p,refresh,refresh_p
0,Not Using Probability,40.94,Not Using Probability,85.83,Not Using Probability,62.2,Not Using Probability,89.76,Not Using Probability,46.46,Not Using Probability,87.4,Not Using Probability,74.53
1,Using Probability,59.06,Using Probability,14.17,Using Probability,37.8,Using Probability,10.24,Using Probability,53.54,Using Probability,12.6,Using Probability,25.47


In [17]:
# Percentage of respondents based on how much they do a certain action after executing a search
df_search = pd.DataFrame()

df_search_for_you = df.groupby(['search_for_you'], observed=False)
df_search[['search_for_you', 'search_for_you_p']] = df_search_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_scroll = df.groupby(['search_scroll'], observed=False)
df_search[['search_scroll', 'search_scroll_p']] = df_search_scroll.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_again = df.groupby(['search_again'], observed=False)
df_search[['search_again', 'search_again_p']] = df_search_again.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_other = df.groupby(['search_other'], observed=False)
df_search[['search_other', 'search_other_p']] = df_search_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search

Unnamed: 0,search_for_you,search_for_you_p,search_scroll,search_scroll_p,search_again,search_again_p,search_other,search_other_p
0,Always,28.35,Always,7.87,Always,1.57,Always,0.79
1,Never,4.72,Never,7.09,Never,14.17,Never,60.63
2,Often,48.03,Often,35.43,Often,17.32,Often,3.15
3,Rarely,4.72,Rarely,18.11,Rarely,28.35,Rarely,20.47
4,Sometimes,14.17,Sometimes,31.5,Sometimes,38.58,Sometimes,14.96


In [18]:
# Probability setting of doing, or not doing an action when inside the search page based on participants usage
df_search = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Not Doing Probability'
df[['search_for_you', 'search_scroll',
    'search_again', 'search_other']] = df[['search_for_you', 'search_scroll',
                                           'search_again', 'search_other']].replace(['Never', 'Rarely', 'Sometimes'], 'Not Doing Probability')

# ['Often', 'Always'] = 'Doing Probability'
df[['search_for_you' ,'search_scroll',
    'search_again', 'search_other']] = df[['search_for_you', 'search_scroll',
                                           'search_again', 'search_other']].replace(['Often', 'Always'], 'Doing Probability')

# Group pages by probability of doing, or not doing an action
df_search_for_you = df.groupby(['search_for_you'], observed=False)
df_search[['search_for_you', 'search_for_you_p']] = df_search_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_scroll = df.groupby(['search_scroll'], observed=False)
df_search[['search_scroll', 'search_scroll_p']] = df_search_scroll.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_again = df.groupby(['search_again'], observed=False)
df_search[['search_again', 'search_again_p']] = df_search_again.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_other = df.groupby(['search_other'], observed=False)
df_search[['search_other', 'search_other_p']] = df_search_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search

Unnamed: 0,search_for_you,search_for_you_p,search_scroll,search_scroll_p,search_again,search_again_p,search_other,search_other_p
0,Doing Probability,76.38,Doing Probability,43.31,Doing Probability,18.9,Doing Probability,3.94
1,Not Doing Probability,23.62,Not Doing Probability,56.69,Not Doing Probability,81.1,Not Doing Probability,96.06


In [19]:
# Percentage of respondents based on how much they do a certain action after executing a search
df_discover = pd.DataFrame()

df_discover_for_you = df.groupby(['discover_for_you'], observed=False)
df_discover[['discover_for_you', 'discover_for_you_prob']] = df_discover_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_stay = df.groupby(['discover_stay'], observed=False)
df_discover[['discover_stay', 'discover_stay_prob']] = df_discover_stay.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_other = df.groupby(['discover_other'], observed=False)
df_discover[['discover_other', 'discover_other_prob']] = df_discover_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover

Unnamed: 0,discover_for_you,discover_for_you_prob,discover_stay,discover_stay_prob,discover_other,discover_other_prob
0,Always,31.5,Always,1.57,Always,1.57
1,Never,18.11,Never,49.61,Never,66.93
2,Often,34.65,Often,6.3,Often,3.15
3,Rarely,3.94,Rarely,18.9,Rarely,14.17
4,Sometimes,11.81,Sometimes,23.62,Sometimes,14.17


In [20]:
# Probability setting of not doing, or not doing an action when inside the discover page based on participants usage
df_discover = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Not Doing Probability'
df[['discover_for_you', 'discover_stay',
    'discover_other']] = df[['discover_for_you', 'discover_stay', 'discover_other']].replace(['Never', 'Rarely', 'Sometimes'], 'Not Doing Probability')

# ['Often', 'Always'] = 'Doing Probability'
df[['discover_for_you' ,'discover_stay',
    'discover_other']] = df[['search_for_you', 'discover_stay', 'discover_other']].replace(['Often', 'Always'], 'Doing Probability')

# Group pages by probability of doing, or not doing an action
df_discover_for_you = df.groupby(['discover_for_you'], observed=False)
df_discover[['discover_for_you', 'discover_for_you_prob']] = df_discover_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_stay = df.groupby(['discover_stay'], observed=False)
df_discover[['discover_stay', 'discover_stay_prob']] = df_discover_stay.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_other = df.groupby(['discover_other'], observed=False)
df_discover[['discover_other', 'discover_other_prob']] = df_discover_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover

Unnamed: 0,discover_for_you,discover_for_you_prob,discover_stay,discover_stay_prob,discover_other,discover_other_prob
0,Doing Probability,76.38,Doing Probability,7.87,Doing Probability,4.72
1,Not Doing Probability,23.62,Not Doing Probability,92.13,Not Doing Probability,95.28


In [21]:
# Percentage of respondents based on how much they watch a certain video given their topic
df_topics = pd.DataFrame()

df_topics_politics = df.groupby(['politics'], observed=False)
df_topics[['politics', 'politics_prob']] = df_topics_politics.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_sports = df.groupby(['sports'], observed=False)
df_topics[['sports', 'sports_prob']] = df_topics_sports.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_eating_habits = df.groupby(['eating_habits'], observed=False)
df_topics[['eating_habits', 'eating_habits_prob']] = df_topics_eating_habits.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_memes = df.groupby(['memes'], observed=False)
df_topics[['memes', 'memes_prob']] = df_topics_memes.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_conspiracies = df.groupby(['conspiracies'], observed=False)
df_topics[['conspiracies', 'conspiracies_prob']] = df_topics_conspiracies.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_pets = df.groupby(['pets'], observed=False)
df_topics[['pets', 'pets_prob']] = df_topics_pets.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_dance = df.groupby(['dance'], observed=False)
df_topics[['dance', 'dance_prob']] = df_topics_dance.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_entertainment = df.groupby(['entertainment'], observed=False)
df_topics[['entertainment', 'entertainment_prob']] = df_topics_entertainment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_food = df.groupby(['food'], observed=False)
df_topics[['food', 'food_prob']] = df_topics_food.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_games = df.groupby(['games'], observed=False)
df_topics[['games', 'games_prob']] = df_topics_games.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_other = df.groupby(['other'], observed=False)
df_topics[['other', 'other_prob']] = df_topics_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics

Unnamed: 0,politics,politics_prob,sports,sports_prob,eating_habits,eating_habits_prob,memes,memes_prob,conspiracies,conspiracies_prob,pets,pets_prob,dance,dance_prob,entertainment,entertainment_prob,food,food_prob,games,games_prob,other,other_prob
0,Always,0.79,Always,3.94,Always,1.57,Always,21.26,Always,0.79,Always,8.66,Always,4.72,Always,24.41,Always,20.47,Always,6.3,Always,13.39
1,Never,14.96,Never,24.41,Never,18.11,Never,3.94,Never,38.58,Never,15.75,Never,29.92,Never,6.3,Never,4.72,Never,26.77,Never,23.62
2,Often,22.83,Often,11.81,Often,19.69,Often,44.09,Often,6.3,Often,29.92,Often,19.69,Often,43.31,Often,37.01,Often,19.69,Often,22.83
3,Rarely,29.13,Rarely,31.5,Rarely,27.56,Rarely,7.09,Rarely,29.92,Rarely,12.6,Rarely,25.2,Rarely,2.36,Rarely,10.24,Rarely,25.98,Rarely,9.45
4,Sometimes,32.28,Sometimes,28.35,Sometimes,33.07,Sometimes,23.62,Sometimes,24.41,Sometimes,33.07,Sometimes,20.47,Sometimes,23.62,Sometimes,27.56,Sometimes,21.26,Sometimes,30.71


In [22]:
# Probability setting for watching, or not watching a video of a certain topic based on participants answers
df_topics = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Do Not Watch'
df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games',
    'other']] = df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games',
                    'other']].replace(['Never', 'Rarely', 'Sometimes'], 'Do Not Watch')

# ['Often', 'Always'] = 'Watch'
df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games',
    'other']] = df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games',
                    'other']].replace(['Often', 'Always'], 'Watch')

# Group topics by probability of watching, or not watching a video
df_topics_politics = df.groupby(['politics'], observed=False)
df_topics[['politics', 'politics_prob']] = df_topics_politics.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_sports = df.groupby(['sports'], observed=False)
df_topics[['sports', 'sports_prob']] = df_topics_sports.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_eating_habits = df.groupby(['eating_habits'], observed=False)
df_topics[['eating_habits', 'eating_habits_prob']] = df_topics_eating_habits.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_memes = df.groupby(['memes'], observed=False)
df_topics[['memes', 'memes_prob']] = df_topics_memes.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_conspiracies = df.groupby(['conspiracies'], observed=False)
df_topics[['conspiracies', 'conspiracies_prob']] = df_topics_conspiracies.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_pets = df.groupby(['pets'], observed=False)
df_topics[['pets', 'pets_prob']] = df_topics_pets.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_dance = df.groupby(['dance'], observed=False)
df_topics[['dance', 'dance_prob']] = df_topics_dance.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_entertainment = df.groupby(['entertainment'], observed=False)
df_topics[['entertainment', 'entertainment_prob']] = df_topics_entertainment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_food = df.groupby(['food'], observed=False)
df_topics[['food', 'food_prob']] = df_topics_food.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_games = df.groupby(['games'], observed=False)
df_topics[['games', 'games_prob']] = df_topics_games.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_other = df.groupby(['other'], observed=False)
df_topics[['other', 'other_prob']] = df_topics_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

In [23]:
# Probability setting for watching, or not watching a video of a certain topic based on participants answers (first half)
df_topics[['politics', 'politics_prob', 'sports', 'sports_prob', 'eating_habits', 'eating_habits_prob', 'memes', 'memes_prob',
    'conspiracies', 'conspiracies_prob']]

Unnamed: 0,politics,politics_prob,sports,sports_prob,eating_habits,eating_habits_prob,memes,memes_prob,conspiracies,conspiracies_prob
0,Do Not Watch,76.38,Do Not Watch,84.25,Do Not Watch,78.74,Do Not Watch,34.65,Do Not Watch,92.91
1,Watch,23.62,Watch,15.75,Watch,21.26,Watch,65.35,Watch,7.09


In [24]:
# Probability setting for watching, or not watching a video of a certain topic based on participants answers (second half)
df_topics[['pets', 'pets_prob', 'dance', 'dance_prob', 'entertainment', 'entertainment_prob', 'food', 'food_prob', 'games', 'games_prob', 'other',
          'other_prob']]

Unnamed: 0,pets,pets_prob,dance,dance_prob,entertainment,entertainment_prob,food,food_prob,games,games_prob,other,other_prob
0,Do Not Watch,61.42,Do Not Watch,75.59,Do Not Watch,32.28,Do Not Watch,42.52,Do Not Watch,74.02,Do Not Watch,63.78
1,Watch,38.58,Watch,24.41,Watch,67.72,Watch,57.48,Watch,25.98,Watch,36.22


In [25]:
# Participants recommendations for eating habits creators, topics, hashtags, etc.
df[df['eating_habits_topics'].notnull()]['eating_habits_topics'].to_frame()

Unnamed: 0,eating_habits_topics
0,"@babytamago, @emthenutritionist, @plantedinthekitchen,"
2,"@mxriyum, aesthetic baking/cooking videos"
3,Abbey Sharp
4,"Abby Sharpe, Justine"
6,"Asian food, healthy food"
7,baby food
8,"Caloric deficit meals, sugar intake, cheap recipes"
9,"Calorie déficit, cooking tutorials, meal ideas"
10,"chopped salads, protein heavy meals, halal recipes"
11,Colleen Christensen


In [26]:
# Participants recommendations for conspiracy_topics, topics, hashtags, etc.
df[df['conspiracy_topics'].notnull()]['conspiracy_topics'].to_frame()

Unnamed: 0,conspiracy_topics
6,I do not watch any content about conspiracies
7,no
8,"Star Wars conspiracies, Dark theories in tv shows and movies like gravity falls, Area 51"
9,I dont Really watch that
10,Conspiracies are usually about the US government terrorism. Mostly about the IDF's actions in Middle Eastern countries (don't know if this qualifies as conspiracy)
15,The recent Kate Middleton conspiracy
16,Ultra wealthy
17,"Every now and then, I'll get a podcast bro trying to make people believe something like dinosaurs had electricity. This is the only time I use the not interested feature."
18,none
19,Don’t watch any
