In [147]:
# Took help from ChatGPT during data cleaning
import pandas as pd
import re
import string
from collections import Counter

In [3]:
# Load Excel file
comments_df = pd.read_excel("Data.xlsx", sheet_name="Comments")

In [5]:
# Clean up and prepare the data
comments_df = comments_df.dropna(subset=["Comment"])
comments_df['Comment'] = comments_df['Comment'].astype(str)

In [7]:
# Filter for question comments -- Looking for question mark(?)
question_comments = comments_df[comments_df['Comment'].str.contains(r'\?', regex=True)]

In [149]:
# Save the number of detected questions
total_questions = len(question_comments)

In [13]:
# Clean function for question keywords
def clean_question(text):
    text = text.lower()
    text = re.sub(r"[^\w\s]", "", text)
    return text

question_comments.loc[:, 'Cleaned'] = question_comments['Comment'].apply(clean_question)

In [15]:
# Tokenize and count frequency
all_question_words = " ".join(question_comments['Cleaned']).split()

In [133]:
# Simple stopword list (manually added words which we should skip during analysis)
custom_stopwords = ['i', 'you', 'the', 'and', 'but', 'what', 'how', 'can', 'is', 'it', 'this', 'a', 'to', 'do', 'my', 'in', 'for', 'have', 'that', 'your', 
                    'are', 'with', 'they', 'video', 'would', 'not', 'from', 'just', 'all', 'any', 'one', 'was', 'know', 'its', 'their', 'also', 'please', 'get', 
                    'because', 'these', 'much', 'there', 'which', 'when', 'really', 'does', 'thank', 'them', 'using', 'some', 'has', 'dont', 'iâm', 'been', 
                    'videos', 'out' ]
filtered_words = [word for word in all_question_words if word not in custom_stopwords and len(word) > 2]
filtered_freq = Counter(filtered_words)


In [135]:
# Top keywords
top_keywords = filtered_freq.most_common(20)

In [137]:
# Sample questions
sample_questions = question_comments['Comment'].head(10).tolist()

In [143]:
# Display results
print("\n || K-Beauty YouTube Comment Question Analysis ||")
print(f"Total Questions Detected: {total_questions}\n")
print("Top 20 Keywords in Questions:")
for word, count in top_keywords:
    print(f"{word}: {count}")

print("\nSample Questions:")
for i, question in enumerate(sample_questions, start=1):
    print(f"{i}. {question}")


 || K-Beauty YouTube Comment Question Analysis ||
Total Questions Detected: 762

Top 20 Keywords in Questions:
skin: 226
sunscreen: 191
products: 121
about: 121
use: 113
love: 110
korean: 110
like: 85
sunscreens: 67
serum: 64
tried: 62
good: 62
face: 61
skincare: 61
sun: 53
makeup: 43
think: 42
beauty: 42
care: 40
cream: 39

Sample Questions:
1. someone said Volufiline need a more "fat" moisturizer to help skin accumulate fats better to plump up the skin, is that true? if it is, anyone have any rec for moisturizer that can fill up that role?
2. I havenâ€™t seen one of his videos in prob at least a year. His face looks great and much slimmer. What changes has he made or is it weight loss? Botox? Spill the tea
3. Can I use the Sungboon lifting ampoule with retinal? On the days I dont use it?
4. Your NOT greasy! I was admiring how glowing your skin looked â¤ï¸ anything recommendations for dark spots?
5. Can I use several serum and how do I know which serum to apply first?
6. Love this,

In [145]:
# Thank you! hope you liked it <3