In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
# Setup initial data frame
df = pd.read_csv('survey_results.csv')
cols = ['timestamp', 'age', 'pronouns', 'nationality', 'usage', 'first_page', 'for_you', 'following', 'favorites', 'friends', 'discover', 'search', 
        'like', 'comment', 'save', 'follow', 'share', 'not_interested', 'refresh', 'interesting', 'search_for_you', 'search_scroll', 'search_again', 
        'search_other', 'discover_for_you', 'discover_stay', 'discover_other', 'count_videos_account', 'politics', 'sports', 'eating_habits', 
        'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 'other', 'political_topics', 'conspiracy_topics', 
        'eating_habits_topics']
df.columns = cols
len(df)

139

In [3]:
# Clean Data

# Remove participants that do not recognize themselves with the He/Him pronouns
df = df[df['pronouns'] == 'He/him']

# Remove participants that do not use TikTok
df = df[df['usage'] != 'Never']

# Remove non valid age groups (NaN)
df['age'] = df['age'].astype('int64')
df = df[pd.to_numeric(df['age'], errors='coerce').notnull()]

# Separate age into groups (Youth [0-19], Young Adults [20-29], Adults [30-64], Seniors [65+])
age_limits = [0,19,29,64,150]
age_groups = ['Youth','Young Adult','Adult','Senior']
df['age_group'] = pd.cut(df['age'], bins=age_limits, labels=age_groups, right=False)

# Remove age groups that are under-represented (<15%)
counts = df['age_group'].value_counts(normalize=True)
df = df[df['age_group'].isin(counts[counts.gt(0.15)].index)]

len(df)

40

In [4]:
# Participants ages sorted by percentage
df_age = df.groupby(['age'], observed=False)
df_age.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

Unnamed: 0,age,percentage
0,15,2.5
1,17,5.0
2,18,12.5
3,19,15.0
4,20,22.5
5,21,17.5
6,22,17.5
7,23,5.0
8,27,2.5


In [5]:
# Participants average age
print("Average age of participants is " + df['age'].mean().astype('int64').astype('str') + " years old")

Average age of participants is 20 years old


In [6]:
# Participants age groups sorted by percentage
df_age_group = df.groupby(['age_group'], observed=False)
df_age_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,age_group,percentage
0,Young Adult,80.0
1,Youth,20.0
2,Adult,0.0
3,Senior,0.0


In [7]:
# Participants pronouns/gender sorted by percentage
df_pronouns_group = df.groupby(['pronouns'], observed=False)
df_pronouns_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,pronouns,percentage
0,He/him,100.0


In [8]:
# Initial TikTok use sorted by percentage
df_usage_group = df.groupby(['usage'], observed=False)
df_usage_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,usage,percentage
0,Multiple times a day,55.0
1,Once everyday,25.0
2,Once in a while,12.5
3,Once every week,7.5


In [9]:
# First page that participants choose to verify sorted by percentage
df_first_page_group = df.groupby(['first_page'], observed=False)
df_first_page_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,first_page,percentage
0,For You,80.0
1,Friends,7.5
2,Following,5.0
3,Search,5.0
4,Tiktok sent by friend,2.5


In [10]:
# Percentage of content deemed interesting by participants in the For You page
df_interesting_group = df.groupby(['interesting'], observed=False)
df_interesting_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,interesting,percentage
0,Often,52.5
1,Always,27.5
2,Sometimes,17.5
3,Never,2.5


In [11]:
# Percentage of numbers of videos watched when checking an account by participants
df_count_videos_account_group = df.groupby(['count_videos_account'], observed=False)
df_count_videos_account_group.size().transform(lambda x: round((x*100)/sum(x),2)).sort_values(ascending=False).reset_index(name='percentage')

Unnamed: 0,count_videos_account,percentage
0,1-5 videos,67.5
1,Upto 10 vidoes,32.5


In [12]:
# Percentage of respondents based on how much they verify a certain page
df_pages = pd.DataFrame()

df_for_you = df.groupby(['for_you'], observed=False)
df_pages[['for_you', 'for_you_prob']] = df_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_following = df.groupby(['following'], observed=False)
df_pages[['following', 'following_prob']] = df_following.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_favorites = df.groupby(['favorites'], observed=False)
df_pages[['favorites', 'favorites_prob']] = df_favorites.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_friends = df.groupby(['friends'], observed=False)
df_pages[['friends', 'friends_prob']] = df_friends.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover = df.groupby(['discover'], observed=False)
df_pages[['discover', 'discover_prob']] = df_discover.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search = df.groupby(['search'], observed=False)
df_pages[['search', 'search_prob']] = df_search.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_pages

Unnamed: 0,for_you,for_you_prob,following,following_prob,favorites,favorites_prob,friends,friends_prob,discover,discover_prob,search,search_prob
0,Always,67.5,Always,5.0,Never,45.0,Always,7.5,Always,2.5,Always,5.0
1,Never,2.5,Never,25.0,Often,12.5,Never,52.5,Never,47.5,Never,5.0
2,Often,15.0,Often,10.0,Rarely,25.0,Often,5.0,Often,2.5,Often,20.0
3,Rarely,5.0,Rarely,32.5,Sometimes,17.5,Rarely,27.5,Rarely,32.5,Rarely,27.5
4,Sometimes,10.0,Sometimes,27.5,,,Sometimes,7.5,Sometimes,15.0,Sometimes,42.5


In [13]:
# Probability setting of staying, or leaving a page based on participants answers
df_pages = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Leaving Probability'
df[['for_you', 'following', 'favorites', 'friends', 
    'discover', 'search']] = df[['for_you', 'following', 'favorites', 'friends', 
                                'discover', 'search']].replace(['Never', 'Rarely', 'Sometimes'], 'Leaving Probability')

# ['Often', 'Always'] = 'Staying Probability'
df[['for_you', 'following', 'favorites', 'friends', 
    'discover', 'search']] = df[['for_you', 'following', 'favorites', 'friends', 
                                 'discover', 'search']].replace(['Often', 'Always'], 'Staying Probability')

# Group pages by probability of staying, or leaving a page
df_for_you = df.groupby(['for_you'], observed=False)
df_pages[['for_you', 'for_you_prob']] = df_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_following = df.groupby(['following'], observed=False)
df_pages[['following', 'following_prob']] = df_following.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_favorites = df.groupby(['favorites'], observed=False)
df_pages[['favorites', 'favorites_prob']] = df_favorites.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_friends = df.groupby(['friends'], observed=False)
df_pages[['friends', 'friends_prob']] = df_friends.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover = df.groupby(['discover'], observed=False)
df_pages[['discover', 'discover_prob']] = df_discover.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search = df.groupby(['search'], observed=False)
df_pages[['search', 'search_prob']] = df_search.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_pages

Unnamed: 0,for_you,for_you_prob,following,following_prob,favorites,favorites_prob,friends,friends_prob,discover,discover_prob,search,search_prob
0,Leaving Probability,17.5,Leaving Probability,85.0,Leaving Probability,87.5,Leaving Probability,87.5,Leaving Probability,95.0,Leaving Probability,75.0
1,Staying Probability,82.5,Staying Probability,15.0,Staying Probability,12.5,Staying Probability,12.5,Staying Probability,5.0,Staying Probability,25.0


In [14]:
# Percentage of respondents based on how much they use a certain feature
df_features = pd.DataFrame()

df_like = df.groupby(['like'], observed=False)
df_features[['like', 'like_p']] = df_like.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_comment = df.groupby(['comment'], observed=False)
df_features[['comment', 'comment_p']] = df_comment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_save = df.groupby(['save'], observed=False)
df_features[['save', 'save_p']] = df_save.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_follow = df.groupby(['follow'], observed=False)
df_features[['follow', 'follow_p']] = df_follow.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_share = df.groupby(['share'], observed=False)
df_features[['share', 'share_p']] = df_share.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_not_interested = df.groupby(['not_interested'], observed=False)
df_features[['not_interested', 'not_interested_p']] = df_not_interested.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_refresh = df.groupby(['refresh'], observed=False)
df_features[['refresh', 'refresh_p']] = df_refresh.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_features

Unnamed: 0,like,like_p,comment,comment_p,save,save_p,follow,follow_p,share,share_p,not_interested,not_interested_p,refresh,refresh_p
0,Always,25.0,Always,5.0,Always,10.0,Always,2.5,Always,12.5,Always,2.5,Never,45.95
1,Never,17.5,Never,47.5,Never,17.5,Never,22.5,Never,20.0,Never,27.5,Often,8.11
2,Often,32.5,Often,10.0,Often,27.5,Rarely,37.5,Often,25.0,Often,7.5,Rarely,24.32
3,Rarely,7.5,Rarely,17.5,Rarely,20.0,Sometimes,37.5,Rarely,17.5,Rarely,32.5,Sometimes,21.62
4,Sometimes,17.5,Sometimes,20.0,Sometimes,25.0,,,Sometimes,25.0,Sometimes,30.0,,


In [15]:
# Probability setting of using, or not using a feature based on participants usage
df_features = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Not Using Probability'
df[['like', 'comment', 'save', 'follow', 'share', 
    'not_interested', 'refresh']] = df[['like', 'comment', 'save', 'follow', 'share', 
                                        'not_interested', 'refresh']].replace(['Never', 'Rarely', 'Sometimes'], 'Not Using Probability')

# ['Often', 'Always'] = 'Using Probability'
df[['like', 'comment', 'save', 'follow', 'share', 
    'not_interested', 'refresh']] = df[['like', 'comment', 'save', 'follow', 'share', 
                                        'not_interested', 'refresh']].replace(['Often', 'Always'], 'Using Probability')

# Group pages by probability of using, or not using a feature
df_like = df.groupby(['like'], observed=False)
df_features[['like', 'like_p']] = df_like.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_comment = df.groupby(['comment'], observed=False)
df_features[['comment', 'comment_p']] = df_comment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_save = df.groupby(['save'], observed=False)
df_features[['save', 'save_p']] = df_save.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_follow = df.groupby(['follow'], observed=False)
df_features[['follow', 'follow_p']] = df_follow.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_share = df.groupby(['share'], observed=False)
df_features[['share', 'share_p']] = df_share.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_not_interested = df.groupby(['not_interested'], observed=False)
df_features[['not_interested', 'not_interested_p']] = df_not_interested.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_refresh = df.groupby(['refresh'], observed=False)
df_features[['refresh', 'refresh_p']] = df_refresh.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_features

Unnamed: 0,like,like_p,comment,comment_p,save,save_p,follow,follow_p,share,share_p,not_interested,not_interested_p,refresh,refresh_p
0,Not Using Probability,42.5,Not Using Probability,85.0,Not Using Probability,62.5,Not Using Probability,97.5,Not Using Probability,62.5,Not Using Probability,90.0,Not Using Probability,91.89
1,Using Probability,57.5,Using Probability,15.0,Using Probability,37.5,Using Probability,2.5,Using Probability,37.5,Using Probability,10.0,Using Probability,8.11


In [16]:
# Percentage of respondents based on how much they do a certain action after executing a search
df_search = pd.DataFrame()

df_search_for_you = df.groupby(['search_for_you'], observed=False)
df_search[['search_for_you', 'search_for_you_p']] = df_search_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_scroll = df.groupby(['search_scroll'], observed=False)
df_search[['search_scroll', 'search_scroll_p']] = df_search_scroll.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_again = df.groupby(['search_again'], observed=False)
df_search[['search_again', 'search_again_p']] = df_search_again.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_other = df.groupby(['search_other'], observed=False)
df_search[['search_other', 'search_other_p']] = df_search_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search

Unnamed: 0,search_for_you,search_for_you_p,search_scroll,search_scroll_p,search_again,search_again_p,search_other,search_other_p
0,Always,32.5,Always,7.5,Never,30.0,Always,2.5
1,Never,10.0,Never,15.0,Often,15.0,Never,62.5
2,Often,37.5,Often,22.5,Rarely,35.0,Often,7.5
3,Rarely,2.5,Rarely,35.0,Sometimes,20.0,Rarely,15.0
4,Sometimes,17.5,Sometimes,20.0,,,Sometimes,12.5


In [17]:
# Probability setting of doing, or not doing an action when inside the search page based on participants usage
df_search = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Not Doing Probability'
df[['search_for_you', 'search_scroll', 
    'search_again', 'search_other']] = df[['search_for_you', 'search_scroll', 
                                           'search_again', 'search_other']].replace(['Never', 'Rarely', 'Sometimes'], 'Not Doing Probability')

# ['Often', 'Always'] = 'Doing Probability'
df[['search_for_you' ,'search_scroll', 
    'search_again', 'search_other']] = df[['search_for_you', 'search_scroll', 
                                           'search_again', 'search_other']].replace(['Often', 'Always'], 'Doing Probability')

# Group pages by probability of doing, or not doing an action
df_search_for_you = df.groupby(['search_for_you'], observed=False)
df_search[['search_for_you', 'search_for_you_p']] = df_search_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_scroll = df.groupby(['search_scroll'], observed=False)
df_search[['search_scroll', 'search_scroll_p']] = df_search_scroll.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_again = df.groupby(['search_again'], observed=False)
df_search[['search_again', 'search_again_p']] = df_search_again.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search_other = df.groupby(['search_other'], observed=False)
df_search[['search_other', 'search_other_p']] = df_search_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_search

Unnamed: 0,search_for_you,search_for_you_p,search_scroll,search_scroll_p,search_again,search_again_p,search_other,search_other_p
0,Doing Probability,70.0,Doing Probability,30.0,Doing Probability,15.0,Doing Probability,10.0
1,Not Doing Probability,30.0,Not Doing Probability,70.0,Not Doing Probability,85.0,Not Doing Probability,90.0


In [18]:
# Percentage of respondents based on how much they do a certain action after executing a search
df_discover = pd.DataFrame()

df_discover_for_you = df.groupby(['discover_for_you'], observed=False)
df_discover[['discover_for_you', 'discover_for_you_prob']] = df_discover_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_stay = df.groupby(['discover_stay'], observed=False)
df_discover[['discover_stay', 'discover_stay_prob']] = df_discover_stay.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_other = df.groupby(['discover_other'], observed=False)
df_discover[['discover_other', 'discover_other_prob']] = df_discover_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover

Unnamed: 0,discover_for_you,discover_for_you_prob,discover_stay,discover_stay_prob,discover_other,discover_other_prob
0,Always,37.5,Always,2.5,Always,2.5
1,Never,20.0,Never,52.5,Never,72.5
2,Often,20.0,Often,2.5,Rarely,10.0
3,Rarely,5.0,Rarely,15.0,Sometimes,15.0
4,Sometimes,17.5,Sometimes,27.5,,


In [19]:
# Probability setting of not doing, or not doing an action when inside the discover page based on participants usage
df_discover = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Not Doing Probability'
df[['discover_for_you', 'discover_stay', 
    'discover_other']] = df[['discover_for_you', 'discover_stay', 'discover_other']].replace(['Never', 'Rarely', 'Sometimes'], 'Not Doing Probability')

# ['Often', 'Always'] = 'Doing Probability'
df[['discover_for_you' ,'discover_stay', 
    'discover_other']] = df[['search_for_you', 'discover_stay', 'discover_other']].replace(['Often', 'Always'], 'Doing Probability')

# Group pages by probability of doing, or not doing an action
df_discover_for_you = df.groupby(['discover_for_you'], observed=False)
df_discover[['discover_for_you', 'discover_for_you_prob']] = df_discover_for_you.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_stay = df.groupby(['discover_stay'], observed=False)
df_discover[['discover_stay', 'discover_stay_prob']] = df_discover_stay.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover_other = df.groupby(['discover_other'], observed=False)
df_discover[['discover_other', 'discover_other_prob']] = df_discover_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_discover

Unnamed: 0,discover_for_you,discover_for_you_prob,discover_stay,discover_stay_prob,discover_other,discover_other_prob
0,Doing Probability,70.0,Doing Probability,5.0,Doing Probability,2.5
1,Not Doing Probability,30.0,Not Doing Probability,95.0,Not Doing Probability,97.5


In [20]:
# Percentage of respondents based on how much they watch a certain video given their topic
df_topics = pd.DataFrame()

df_topics_politics = df.groupby(['politics'], observed=False)
df_topics[['politics', 'politics_prob']] = df_topics_politics.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_sports = df.groupby(['sports'], observed=False)
df_topics[['sports', 'sports_prob']] = df_topics_sports.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_eating_habits = df.groupby(['eating_habits'], observed=False)
df_topics[['eating_habits', 'eating_habits_prob']] = df_topics_eating_habits.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_memes = df.groupby(['memes'], observed=False)
df_topics[['memes', 'memes_prob']] = df_topics_memes.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_conspiracies = df.groupby(['conspiracies'], observed=False)
df_topics[['conspiracies', 'conspiracies_prob']] = df_topics_conspiracies.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_pets = df.groupby(['pets'], observed=False)
df_topics[['pets', 'pets_prob']] = df_topics_pets.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_dance = df.groupby(['dance'], observed=False)
df_topics[['dance', 'dance_prob']] = df_topics_dance.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_entertainment = df.groupby(['entertainment'], observed=False)
df_topics[['entertainment', 'entertainment_prob']] = df_topics_entertainment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_food = df.groupby(['food'], observed=False)
df_topics[['food', 'food_prob']] = df_topics_food.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_games = df.groupby(['games'], observed=False)
df_topics[['games', 'games_prob']] = df_topics_games.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_other = df.groupby(['other'], observed=False)
df_topics[['other', 'other_prob']] = df_topics_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics

Unnamed: 0,politics,politics_prob,sports,sports_prob,eating_habits,eating_habits_prob,memes,memes_prob,conspiracies,conspiracies_prob,pets,pets_prob,dance,dance_prob,entertainment,entertainment_prob,food,food_prob,games,games_prob,other,other_prob
0,Never,15.0,Always,10.0,Always,2.5,Always,25.0,Never,50.0,Always,5.0,Never,42.5,Always,25.0,Always,12.5,Always,12.5,Always,7.5
1,Often,22.5,Never,27.5,Never,32.5,Never,2.5,Rarely,20.0,Never,22.5,Often,15.0,Never,7.5,Never,2.5,Never,10.0,Never,32.5
2,Rarely,27.5,Often,15.0,Often,12.5,Often,57.5,Sometimes,30.0,Often,12.5,Rarely,30.0,Often,40.0,Often,35.0,Often,30.0,Often,12.5
3,Sometimes,35.0,Rarely,22.5,Rarely,30.0,Rarely,2.5,,,Rarely,25.0,Sometimes,12.5,Rarely,2.5,Rarely,12.5,Rarely,15.0,Rarely,10.0


In [21]:
# Probability setting for watching, or not watching a video of a certain topic based on participants answers
df_topics = pd.DataFrame()

# ['Never', 'Rarely', 'Sometimes'] = 'Do Not Watch'
df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 
    'other']] = df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 
                    'other']].replace(['Never', 'Rarely', 'Sometimes'], 'Do Not Watch')

# ['Often', 'Always'] = 'Watch'
df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 
    'other']] = df[['politics', 'sports', 'eating_habits', 'memes', 'conspiracies', 'pets', 'dance', 'entertainment', 'food', 'games', 
                    'other']].replace(['Often', 'Always'], 'Watch')

# Group topics by probability of watching, or not watching a video
df_topics_politics = df.groupby(['politics'], observed=False)
df_topics[['politics', 'politics_prob']] = df_topics_politics.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_sports = df.groupby(['sports'], observed=False)
df_topics[['sports', 'sports_prob']] = df_topics_sports.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_eating_habits = df.groupby(['eating_habits'], observed=False)
df_topics[['eating_habits', 'eating_habits_prob']] = df_topics_eating_habits.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_memes = df.groupby(['memes'], observed=False)
df_topics[['memes', 'memes_prob']] = df_topics_memes.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_conspiracies = df.groupby(['conspiracies'], observed=False)
df_topics[['conspiracies', 'conspiracies_prob']] = df_topics_conspiracies.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_pets = df.groupby(['pets'], observed=False)
df_topics[['pets', 'pets_prob']] = df_topics_pets.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_dance = df.groupby(['dance'], observed=False)
df_topics[['dance', 'dance_prob']] = df_topics_dance.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_entertainment = df.groupby(['entertainment'], observed=False)
df_topics[['entertainment', 'entertainment_prob']] = df_topics_entertainment.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_food = df.groupby(['food'], observed=False)
df_topics[['food', 'food_prob']] = df_topics_food.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_games = df.groupby(['games'], observed=False)
df_topics[['games', 'games_prob']] = df_topics_games.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

df_topics_other = df.groupby(['other'], observed=False)
df_topics[['other', 'other_prob']] = df_topics_other.size().transform(lambda x: round((x*100)/sum(x),2)).reset_index(name='percentage')

In [22]:
# Probability setting for watching, or not watching a video of a certain topic based on participants answers (first half)
df_topics[['politics', 'politics_prob', 'sports', 'sports_prob', 'eating_habits', 'eating_habits_prob', 'memes', 'memes_prob',
    'conspiracies', 'conspiracies_prob']]

Unnamed: 0,politics,politics_prob,sports,sports_prob,eating_habits,eating_habits_prob,memes,memes_prob,conspiracies,conspiracies_prob
0,Do Not Watch,77.5,Do Not Watch,75.0,Do Not Watch,85.0,Do Not Watch,17.5,Do Not Watch,100.0
1,Watch,22.5,Watch,25.0,Watch,15.0,Watch,82.5,,


In [23]:
# Probability setting for watching, or not watching a video of a certain topic based on participants answers (second half)
df_topics[['pets', 'pets_prob', 'dance', 'dance_prob', 'entertainment', 'entertainment_prob', 'food', 'food_prob', 'games', 'games_prob', 'other', 
          'other_prob']]

Unnamed: 0,pets,pets_prob,dance,dance_prob,entertainment,entertainment_prob,food,food_prob,games,games_prob,other,other_prob
0,Do Not Watch,82.5,Do Not Watch,85.0,Do Not Watch,35.0,Do Not Watch,52.5,Do Not Watch,57.5,Do Not Watch,80.0
1,Watch,17.5,Watch,15.0,Watch,65.0,Watch,47.5,Watch,42.5,Watch,20.0
