# Imports 

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import nltk
import ast
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None)

In [3]:
df = pd.read_csv('datasets/analysis.csv')

In [4]:
# no nul' values 
df.isna().sum()

title               0
split_comments      0
reordered_scores    0
reordered_labels    0
sentiment           0
dtype: int64

In [5]:
df['reordered_scores'] = df['reordered_scores'].apply((lambda x: ast.literal_eval(x)))

In [6]:
fears = df.sort_values(by='reordered_scores', key=lambda x: x.apply(lambda y: y[0]), ascending=False)
frustrations = df.sort_values(by='reordered_scores', key=lambda x: x.apply(lambda y: y[1]), ascending=False)
aspirations = df.sort_values(by='reordered_scores', key=lambda x: x.apply(lambda y: y[2]), ascending=False)


In [7]:
fears

Unnamed: 0,title,split_comments,reordered_scores,reordered_labels,sentiment
6233,How to Beat Procrastination,"""fear is the greatest illusion we create within our self.""","[0.9896448254585266, 0.006187118589878082, 0.0041680666618049145]","['fears', 'frustrations', 'aspirations']",negative
5909,How to Beat Procrastination,i feel like i need to listen to this weekly to face my fears.,"[0.9816955924034119, 0.01092555746436119, 0.007378820795565844]","['fears', 'frustrations', 'aspirations']",neutral
1383,5 Essential Tips for Long-Lasting Productivity,али обдал чай кипятком :face_screaming_in_fear:,"[0.9796825647354126, 0.01780746690928936, 0.0025099022313952446]","['fears', 'frustrations', 'aspirations']",negative
6558,How to Beat Procrastination,that fear compass concept is genius!,"[0.9769877791404724, 0.0062966798432171345, 0.016715513542294502]","['fears', 'frustrations', 'aspirations']",positive
7087,My Toxic Relationship With Productivity,status anxiety - alain de botton,"[0.9682902693748474, 0.021380305290222168, 0.010329443961381912]","['fears', 'frustrations', 'aspirations']",neutral
...,...,...,...,...,...
5623,How I Manage My Time - 10 Time Management Tips,"i love this guy, the reason i started my dreams now.","[0.0028450298123061657, 0.002766459947451949, 0.9943884611129761]","['fears', 'frustrations', 'aspirations']",positive
14841,5 Small Changes to Supercharge Your Focus,thomas frank is an inspiration,"[0.0027784034609794617, 0.002567431889474392, 0.9946541786193848]","['fears', 'frustrations', 'aspirations']",positive
1259,5 Essential Tips for Long-Lasting Productivity,it's inspiring!,"[0.002528827404603362, 0.0036696013994514942, 0.9938015937805176]","['fears', 'frustrations', 'aspirations']",positive
11272,7 Actionable Ways to Achieve More in Life,you're my inspiration,"[0.002028948161751032, 0.001852899556979537, 0.9961181282997131]","['fears', 'frustrations', 'aspirations']",positive


# Analysis 
1. I'll have to tokenize the data 
2. Remove Stopwords 
3. Stemming/Lemmatization [reduce words to their basic form] (running -> run)
4. vectorization 
5. Possibly Feature Reduction 

In [8]:
# Preprocess the text
def preprocess_text(text):
    text = re.sub(r'\W', ' ', str(text))
    text = text.lower()
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)
    text = re.sub(r'\^[a-zA-Z]\s+', ' ', text)
    text = re.sub(r'\s+', ' ', text, flags=re.I)
    text = re.sub(r'^b\s+', '', text)
    return text

In [43]:
df['processed_comments'] = df['split_comments'].apply(preprocess_text)

In [44]:
df

Unnamed: 0,title,split_comments,reordered_scores,reordered_labels,sentiment,processed_comments,cluster
0,How to Build Your Ultimate Productivity System,how to remember the system then?,"[0.20157860219478607, 0.34655919671058655, 0.45186224579811096]","['fears', 'frustrations', 'aspirations']",neutral,how to remember the system then,0
1,How to Build Your Ultimate Productivity System,"15/3/24, x2.","[0.3612099289894104, 0.26915282011032104, 0.36963728070259094]","['fears', 'frustrations', 'aspirations']",neutral,15 3 24 x2,0
2,How to Build Your Ultimate Productivity System,thanks brother it's helping video :red_heart::party_popper:,"[0.14195847511291504, 0.17578278481960297, 0.6822587251663208]","['fears', 'frustrations', 'aspirations']",positive,thanks brother it helping video red_heart party_popper,2
3,How to Build Your Ultimate Productivity System,you should try breathing and just being in the moment while you are in the toilet. :grinning_squinting_face:,"[0.19090235233306885, 0.4940927028656006, 0.31500494480133057]","['fears', 'frustrations', 'aspirations']",neutral,you should try breathing and just being in the moment while you are in the toilet grinning_squinting_face,0
4,How to Build Your Ultimate Productivity System,took me out when he said “my brain is a dumass” :face_with_tears_of_joy::face_with_tears_of_joy::face_with_tears_of_joy:,"[0.0920601338148117, 0.8241838812828064, 0.08375602215528488]","['fears', 'frustrations', 'aspirations']",negative,took me out when he said my brain is dumass face_with_tears_of_joy face_with_tears_of_joy face_with_tears_of_joy,0
...,...,...,...,...,...,...,...
14921,5 Small Changes to Supercharge Your Focus,please also upload hindi video,"[0.3803737759590149, 0.29799097776412964, 0.32163533568382263]","['fears', 'frustrations', 'aspirations']",neutral,please also upload hindi video,2
14922,5 Small Changes to Supercharge Your Focus,im 3rd,"[0.17630787193775177, 0.18799357116222382, 0.6356985569000244]","['fears', 'frustrations', 'aspirations']",neutral,im 3rd,0
14923,5 Small Changes to Supercharge Your Focus,what if he pinned my cmt?:grinning_face_with_sweat:,"[0.5885409116744995, 0.3420863449573517, 0.06937270611524582]","['fears', 'frustrations', 'aspirations']",neutral,what if he pinned my cmt grinning_face_with_sweat,0
14924,5 Small Changes to Supercharge Your Focus,first to comment,"[0.28964871168136597, 0.2726641297340393, 0.4376871585845947]","['fears', 'frustrations', 'aspirations']",neutral,first to comment,0


In [45]:
df['sentiment'].value_counts()

sentiment
positive    7444
neutral     4948
negative    2534
Name: count, dtype: int64

In [50]:
df

Unnamed: 0,title,split_comments,reordered_scores,reordered_labels,sentiment,processed_comments
0,How to Build Your Ultimate Productivity System,how to remember the system then?,"[0.20157860219478607, 0.34655919671058655, 0.45186224579811096]","['fears', 'frustrations', 'aspirations']",neutral,how to remember the system then
1,How to Build Your Ultimate Productivity System,"15/3/24, x2.","[0.3612099289894104, 0.26915282011032104, 0.36963728070259094]","['fears', 'frustrations', 'aspirations']",neutral,15 3 24 x2
2,How to Build Your Ultimate Productivity System,thanks brother it's helping video :red_heart::party_popper:,"[0.14195847511291504, 0.17578278481960297, 0.6822587251663208]","['fears', 'frustrations', 'aspirations']",positive,thanks brother it helping video red_heart party_popper
3,How to Build Your Ultimate Productivity System,you should try breathing and just being in the moment while you are in the toilet. :grinning_squinting_face:,"[0.19090235233306885, 0.4940927028656006, 0.31500494480133057]","['fears', 'frustrations', 'aspirations']",neutral,you should try breathing and just being in the moment while you are in the toilet grinning_squinting_face
4,How to Build Your Ultimate Productivity System,took me out when he said “my brain is a dumass” :face_with_tears_of_joy::face_with_tears_of_joy::face_with_tears_of_joy:,"[0.0920601338148117, 0.8241838812828064, 0.08375602215528488]","['fears', 'frustrations', 'aspirations']",negative,took me out when he said my brain is dumass face_with_tears_of_joy face_with_tears_of_joy face_with_tears_of_joy
...,...,...,...,...,...,...
14921,5 Small Changes to Supercharge Your Focus,please also upload hindi video,"[0.3803737759590149, 0.29799097776412964, 0.32163533568382263]","['fears', 'frustrations', 'aspirations']",neutral,please also upload hindi video
14922,5 Small Changes to Supercharge Your Focus,im 3rd,"[0.17630787193775177, 0.18799357116222382, 0.6356985569000244]","['fears', 'frustrations', 'aspirations']",neutral,im 3rd
14923,5 Small Changes to Supercharge Your Focus,what if he pinned my cmt?:grinning_face_with_sweat:,"[0.5885409116744995, 0.3420863449573517, 0.06937270611524582]","['fears', 'frustrations', 'aspirations']",neutral,what if he pinned my cmt grinning_face_with_sweat
14924,5 Small Changes to Supercharge Your Focus,first to comment,"[0.28964871168136597, 0.2726641297340393, 0.4376871585845947]","['fears', 'frustrations', 'aspirations']",neutral,first to comment


# Analyse Fears

In [51]:
# sort by fears 

In [None]:
fears = df.sort_values(by='reordered_scores', key=lambda x: x.apply(lambda y: y[0]), ascending=False)

In [56]:
fears.head(10)

Unnamed: 0,title,split_comments,reordered_scores,reordered_labels,sentiment
6233,How to Beat Procrastination,"""fear is the greatest illusion we create within our self.""","[0.9896448254585266, 0.006187118589878082, 0.0041680666618049145]","['fears', 'frustrations', 'aspirations']",negative
5909,How to Beat Procrastination,i feel like i need to listen to this weekly to face my fears.,"[0.9816955924034119, 0.01092555746436119, 0.007378820795565844]","['fears', 'frustrations', 'aspirations']",neutral
1383,5 Essential Tips for Long-Lasting Productivity,али обдал чай кипятком :face_screaming_in_fear:,"[0.9796825647354126, 0.01780746690928936, 0.0025099022313952446]","['fears', 'frustrations', 'aspirations']",negative
6558,How to Beat Procrastination,that fear compass concept is genius!,"[0.9769877791404724, 0.0062966798432171345, 0.016715513542294502]","['fears', 'frustrations', 'aspirations']",positive
7087,My Toxic Relationship With Productivity,status anxiety - alain de botton,"[0.9682902693748474, 0.021380305290222168, 0.010329443961381912]","['fears', 'frustrations', 'aspirations']",neutral
6272,How to Beat Procrastination,"face fears, don't procrastinate, read mistborn. one down, two to go.","[0.958441436290741, 0.03407161310315132, 0.007486884016543627]","['fears', 'frustrations', 'aspirations']",neutral
6006,How to Beat Procrastination,"this part is so freaking important that i will just emphasize it here:2:07 ""fear is actually a very good thing, because when we feel scared about doing something, it usually means we should just do the thing""","[0.9581592679023743, 0.020889367908239365, 0.0209514107555151]","['fears', 'frustrations', 'aspirations']",positive
14359,Why You’re Always Distracted - 5 Mistakes Ruining Your Focus,100% on avoiding those feelings. especially the anxiety and fear and imposter syndrome.,"[0.9568814635276794, 0.03187772259116173, 0.011240832507610321]","['fears', 'frustrations', 'aspirations']",negative
4413,How I Manage My Time - 10 Time Management Tips,life is short. live it. fear is natural. face it. memory is powerful. use it,"[0.9544166922569275, 0.017457935959100723, 0.0281253419816494]","['fears', 'frustrations', 'aspirations']",positive
1368,5 Essential Tips for Long-Lasting Productivity,"one day i asked myself, why am i so obsessed with productivity? when really considered, it came down to a base level fear of death. the more productive i was, the more ""life"" i could supposedly live. that's when i realized it's impossible to live more life than what you have. all you can do is enjoy the life you do have, and that's why true productivity is about living each moment to its fullest.","[0.9542709589004517, 0.020566316321492195, 0.025162799283862114]","['fears', 'frustrations', 'aspirations']",negative


# Analyse Frustrations 

In [52]:
frustrations = df.sort_values(by='reordered_scores', key=lambda x: x.apply(lambda y: y[1]), ascending=False)

In [57]:
frustrations.head(10)

Unnamed: 0,title,split_comments,reordered_scores,reordered_labels,sentiment,processed_comments
4713,How I Manage My Time - 10 Time Management Tips,this is so helpful. my biggest problem is being dissatisfied when i cant accomplish everything in my schedule (i bite more than i can chew). so i'd probably think of setting priorities:),"[0.008109974674880505, 0.9773792028427124, 0.014510894194245338]","['fears', 'frustrations', 'aspirations']",positive,this is so helpful my biggest problem is being dissatisfied when cant accomplish everything in my schedule bite more than can chew so d probably think of setting priorities
3002,"How I Organize My Life - The ""LAZI"" Productivity System",bro u are from pakistan can u not speak urdu :unamused_face::unamused_face::unamused_face::thinking_face::thinking_face::thinking_face::thinking_face::expressionless_face::face_with_rolling_eyes:,"[0.01956305094063282, 0.9771984219551086, 0.003238518023863435]","['fears', 'frustrations', 'aspirations']",negative,bro are from pakistan can not speak urdu unamused_face unamused_face unamused_face thinking_face thinking_face thinking_face thinking_face expressionless_face face_with_rolling_eyes
5618,How I Manage My Time - 10 Time Management Tips,music is irritating,"[0.006598987150937319, 0.9766308665275574, 0.01677016355097294]","['fears', 'frustrations', 'aspirations']",negative,music is irritating
3124,My Complete Productivity System,i stopped using fantastical - having to out-edit the ai is so annoying,"[0.009665977209806442, 0.9747980833053589, 0.01553589478135109]","['fears', 'frustrations', 'aspirations']",negative,i stopped using fantastical having to out edit the ai is so annoying
3948,How I Manage My Time - 10 Time Management Tips,02:40 “drowning in my to do list” is an accurate description of my biggest problem for the past 10 years. :frowning_face:time to change things up.,"[0.016225185245275497, 0.972160816192627, 0.011613949202001095]","['fears', 'frustrations', 'aspirations']",negative,02 40 drowning in my to do list is an accurate description of my biggest problem for the past 10 years frowning_face time to change things up
6894,My Toxic Relationship With Productivity,"""chronic feeling of dissatisfaction at the end of the day that i didn't do enough today""","[0.018292831256985664, 0.9719666242599487, 0.009740522131323814]","['fears', 'frustrations', 'aspirations']",negative,chronic feeling of dissatisfaction at the end of the day that didn do enough today
5086,How I Manage My Time - 10 Time Management Tips,i’m always tired or either miss something i’m supposed to do :loudly_crying_face:,"[0.01852129027247429, 0.9711097478866577, 0.010368946939706802]","['fears', 'frustrations', 'aspirations']",negative,i always tired or either miss something m supposed to do loudly_crying_face
7455,My Toxic Relationship With Productivity,my problem is revenge bedtime procustenation is what i got :face_with_tears_of_joy: thank dr,"[0.01258024014532566, 0.9701489210128784, 0.017270857468247414]","['fears', 'frustrations', 'aspirations']",negative,my problem is revenge bedtime procustenation is what got face_with_tears_of_joy thank dr
7298,My Toxic Relationship With Productivity,":four_leaf_clover: good stuff.....btw, it is burnout.","[0.019384639337658882, 0.9698436856269836, 0.01077162753790617]","['fears', 'frustrations', 'aspirations']",positive,four_leaf_clover good stuff btw it is burnout
3530,My Complete Productivity System,this is exhausting,"[0.022802716121077538, 0.9687666296958923, 0.008430668152868748]","['fears', 'frustrations', 'aspirations']",negative,this is exhausting


# Analyse Aspirations 

In [54]:
aspirations = df.sort_values(by='reordered_scores', key=lambda x: x.apply(lambda y: y[2]), ascending=False)