In [6]:
import pandas as pd
import json
import re
import os
import time
import requests
import matplotlib.pyplot as plt
import seaborn as sns
from langdetect import detect
from pythainlp.tokenize import word_tokenize, sent_tokenize
from pythainlp.util import normalize
from pythainlp.util import Trie
from pythainlp.corpus.common import thai_stopwords
from pythainlp.corpus.common import thai_words

from datetime import datetime
import googleapiclient.discovery

In [100]:
with open('model_smartphone.json', 'r', encoding="utf-8") as file:
    model_ip = json.load(file)

In [116]:
with open('Question_lexicon.json', 'r', encoding="utf-8") as file:
    question_list = json.load(file)

In [102]:
model_word = []
for i in model_ip:
    model_word.append(i)
    for j in model_ip[i]:
        model_word.append(j)

model = {i for i in model_word}

In [103]:
def changes_word_of_model(text, dict_word):
    correct_text = []
    for word in text:
        for i in dict_word:
            if word in dict_word[i]:
                correct_word = i
                break
        else:
            correct_word = word
        correct_text.append(correct_word)
    return " ".join(correct_text)

In [117]:
def tokenize_and_split_sentences(dataframe, text_column_name):
    # Create a new DataFrame to store the tokenized sentences
    new_dataframe = pd.DataFrame(columns=dataframe.columns)
    rows_to_concat = []
    
    # Iterate over each row in the original DataFrame
    for _, row in dataframe.iterrows():
        text = row[text_column_name]
        text = re.sub(r"\n", '', text)
        text = re.sub(r"\r", '', text)
        sentences = sent_tokenize(text)
        
        # Create a new row for each tokenized sentence
        for sentence in sentences:
            new_row = row.copy()
            new_row[text_column_name] = sentence
            rows_to_concat.append(new_row)
    
    # Convert the list of rows into a DataFrame
    new_dataframe = pd.DataFrame(rows_to_concat)
    new_dataframe.reset_index(drop=True, inplace=True)
    
    return new_dataframe

In [118]:
keywords_option = ["‡∏´‡∏ô‡πâ‡∏≤‡∏à‡∏≠","‡∏à‡∏≠", "‡πÅ‡∏ö‡∏ï", "‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà", "‡∏Å‡∏•‡πâ‡∏≠‡∏á", "‡∏ñ‡πà‡∏≤‡∏¢","‡∏ñ‡πà‡∏≤‡∏¢‡∏†‡∏≤‡∏û", "‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠", "‡∏£‡∏π‡∏õ‡∏ñ‡πà‡∏≤‡∏¢","hz","‡∏ñ‡πà‡∏≤‡∏¢‡∏Ñ‡∏•‡∏¥‡∏õ","dynamic island","fps","‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏ô‡πâ‡∏≤","‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏•‡∏±‡∏á","focus"]
keywords = keywords_option
print(keywords)

def keyword_filter(data):
    
    relevant_words = [word for word in data if word in keywords]
    if not relevant_words:
        return None
    
    data = "".join(word for word in data)
    return data

['‡∏´‡∏ô‡πâ‡∏≤‡∏à‡∏≠', '‡∏à‡∏≠', '‡πÅ‡∏ö‡∏ï', '‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà', '‡∏Å‡∏•‡πâ‡∏≠‡∏á', '‡∏ñ‡πà‡∏≤‡∏¢', '‡∏ñ‡πà‡∏≤‡∏¢‡∏†‡∏≤‡∏û', '‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠', '‡∏£‡∏π‡∏õ‡∏ñ‡πà‡∏≤‡∏¢', 'hz', '‡∏ñ‡πà‡∏≤‡∏¢‡∏Ñ‡∏•‡∏¥‡∏õ', 'dynamic island', 'fps', '‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏ô‡πâ‡∏≤', '‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏•‡∏±‡∏á', 'focus']


In [119]:
def question_filter(dataframe, text_column):
    # Create a boolean mask to identify rows with question words
    mask = dataframe[text_column].apply(lambda text: any(word in text for word in question_list))
    # Invert the mask to keep only rows without question words
    dataframe = dataframe.loc[~mask]

    return dataframe

In [107]:
custom_word = model.union(question_list)
# print(custom_word)
words = custom_word.union(thai_words())
custom_dictionary_trie = Trie(words)

def word_token(data):
    data = "".join(u.lower() for u in data if u not in ("?", ".", ";", ":", "!", '"', "‡πÜ", "‡∏Ø",
                    "/", "<", ">", "^", "-", "*", "(", ")", "%", "_", ",", "#", "'", "{", "}", "‚Ä¶","&"," "))
    data = normalize(data)
    data = word_tokenize(data, custom_dict=custom_dictionary_trie, keep_whitespace=False)

    return data

# ********‡∏î‡∏πcode‡∏ô‡∏µ‡πâ‡πÉ‡∏´‡∏°‡πà*********

In [37]:
# def analyze_sentiment_AIforthai(df, column_name):
#     api_key = "FOFZpO9UFwSMquHMwVmZ7cjebmv0Yv8a"
#     results_df = pd.DataFrame(columns=["id","sentence", "sentiment_data"])
#     for index, row in df.iterrows():
#         comment = row[column_name]
#         response = requests.post(
#             "https://api.aiforthai.in.th/ssense",
#             data={"text": comment},
#             headers={"Apikey": api_key}
#         )
#         sentiment_data = dict(response.json())
#         if sentiment_data["sentiment"]["score"] == "0":
#             sentiment_data = "neu"
#         else:
#             sentiment_data = sentiment_data["sentiment"]['polarity'][:3]
#         results_df.loc[index] = [comment, sentiment_data]
#     return results_df

In [37]:
youtube = googleapiclient.discovery.build(
    "youtube", "v3", developerKey="*******************")
allcomment = []

def is_thai(text):
    try:
        return detect(text) == 'th'
    except:
        return False

def get_comments(video_id, keyword, brand):
    request = youtube.commentThreads().list(
        part="snippet,replies",
        videoId=video_id,
        textFormat="plainText",
        maxResults=100
    )

    comments = []
    thai_regex = '[‡∏Å-‡πô]'
    def remove_thai(s): return re.sub(thai_regex, '', s)
    keyword_search = remove_thai(keyword)

    while request is not None:
        response = request.execute()
        for item in response["items"]:
            if is_thai(item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]) == True:
                if item["snippet"]["totalReplyCount"] > 0:
                    # print(item)
                    # print(item["replies"]['comments'])
                    replies = item["replies"]["comments"]
                    for reply in replies:
                        # if detect(reply['snippet']["textDisplay"]=='th'):
                        # print("****Reply*****", reply)
                        reply_published_at = reply['snippet']["publishedAt"]
                        reply_datetime_obj = datetime.strptime(
                            reply_published_at, "%Y-%m-%dT%H:%M:%SZ")
                        reply_published_date = reply_datetime_obj.strftime(
                            "%Y-%m-%d")
                        reply_published_time = reply_datetime_obj.strftime(
                            "%H:%M:%S")

                        comments.append({
                            "textDisplay": reply['snippet']["textDisplay"],
                            "authorDisplayName": reply['snippet']["authorDisplayName"],
                            "author_id": reply['snippet']["authorChannelId"]["value"],
                            "likeCount": reply['snippet']["likeCount"],
                            "publishedDate": reply_published_date,
                            "publishedTime": reply_published_time,
                            "videoId": "https://www.youtube.com/watch?v=" + reply['snippet']["videoId"],
                            "keyword_search": keyword_search,
                            "Brand": brand,
                            "type_data": "Reply"
                        })
                    # if detect(item["snippet"]["topLevelComment"]["snippet"]['textDisplay']=='th'):
                    comment = item["snippet"]["topLevelComment"]["snippet"]
                    published_at = comment["publishedAt"]
                    datetime_obj = datetime.strptime(
                        published_at, "%Y-%m-%dT%H:%M:%SZ")
                    published_date = datetime_obj.strftime("%Y-%m-%d")
                    published_time = datetime_obj.strftime("%H:%M:%S")
                    

                    comments.append({
                        "textDisplay": comment["textDisplay"],
                        "authorDisplayName": comment["authorDisplayName"],
                        "author_id": comment["authorChannelId"]["value"],
                        "likeCount": comment["likeCount"],
                        "publishedDate": published_date,
                        "publishedTime": published_time,
                        "videoId": "https://www.youtube.com/watch?v=" + comment["videoId"],
                        "keyword_search": keyword_search,
                        "Brand": brand,
                        "type_data":"Comment"
                    })
                else:
                    # if detect(item["snippet"]["topLevelComment"]["snippet"]['textDisplay']=='th'):
                    comment = item["snippet"]["topLevelComment"]["snippet"]
                    published_at = comment["publishedAt"]
                    datetime_obj = datetime.strptime(
                        published_at, "%Y-%m-%dT%H:%M:%SZ")
                    published_date = datetime_obj.strftime("%Y-%m-%d")
                    published_time = datetime_obj.strftime("%H:%M:%S")
                    

                    comments.append({
                        "textDisplay": comment["textDisplay"],
                        "authorDisplayName": comment["authorDisplayName"],
                        "author_id": comment["authorChannelId"]["value"],
                        "likeCount": comment["likeCount"],
                        "publishedDate": published_date,
                        "publishedTime": published_time,
                        "videoId": "https://www.youtube.com/watch?v=" + comment["videoId"],
                        "keyword_search": keyword_search,
                        "Brand": brand,
                        "type_data":"Comment"
                    })

        request = youtube.commentThreads().list_next(request, response)

    return comments


def search_videos(query, brand):
    request = youtube.search().list(
        part="id,snippet",
        type='video',
        q=query,
        maxResults=1,
        regionCode='TH'
    )
    response = request.execute()
    for item in response["items"]:
        video_id = item["id"]["videoId"]
        url = f"https://www.youtube.com/watch?v={video_id}"
        # print(url)
        comments = get_comments(video_id, query, brand)
        allcomment.extend(comments)
        # print(allcomment)
        time.sleep(5)


def search_videos_from_csv(csv_file, select_brand):
    df = pd.read_excel(csv_file)
    df = df[df["Brand"] == select_brand]
    keywords = df["Keyword_Search"].tolist()
    brands = df["Brand"].tolist()

    print("‡∏¢‡∏µ‡πà‡∏´‡πâ‡∏≠‡∏™‡∏°‡∏≤‡∏£‡πå‡∏ó‡πÇ‡∏ü‡∏ô‡∏ó‡∏µ‡πà‡∏î‡∏∂‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏°‡∏≤")
    print("---------------------")
    print(keywords)
    print(brands)

    for keyword, brand in zip(keywords, brands):
        keyword_search = "‡∏£‡∏µ‡∏ß‡∏¥‡∏ß"+keyword
        print("Brand: "+brand, "Search: "+keyword_search)
        search_videos(keyword_search, brand)


if __name__ == "__main__":
    csv_file = 'smartphone.xlsx'
    select_brand = input("Please Enter to Collect data of Brand: ")
    # ‡∏Ñ‡πâ‡∏ô‡∏´‡∏≤‡∏ó‡∏µ‡πà‡∏•‡∏∞‡πÅ‡∏ö‡∏£‡∏ô‡∏î‡πå
    search_videos_from_csv(csv_file, select_brand)

    json_filename = f"comments.json"
    with open(json_filename, "w", encoding="utf-8") as file:
        json.dump(allcomment, file, ensure_ascii=False, indent=4)

‡∏¢‡∏µ‡πà‡∏´‡πâ‡∏≠‡∏™‡∏°‡∏≤‡∏£‡πå‡∏ó‡πÇ‡∏ü‡∏ô‡∏ó‡∏µ‡πà‡∏î‡∏∂‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏°‡∏≤
---------------------
['Samsung Galaxy M04', 'Samsung Galaxy A04']
['Samsung', 'Samsung']
Brand: Samsung Search: ‡∏£‡∏µ‡∏ß‡∏¥‡∏ßSamsung Galaxy M04
Brand: Samsung Search: ‡∏£‡∏µ‡∏ß‡∏¥‡∏ßSamsung Galaxy A04


# MongoDB Part: Put and Get Data

In [20]:
from pymongo import MongoClient

In [7]:
client = MongoClient('mongodb+srv://dararatta:021044noinoi@projectcongrat.z95prpk.mongodb.net/')  
database = client.Projectsentiment  
collection = database.SmartphoneReview

json_filename = 'comments.json'  # Replace with your JSON file name
with open(json_filename, 'r', encoding='utf-8') as file:
    data = json.load(file)

collection.insert_many(data)
print(f"Inserted {len(data)} documents into the collection.")

Inserted 2386 documents into the collection.


# Get Data Part

In [24]:
client = MongoClient('mongodb+srv://dararatta:021044noinoi@projectcongrat.z95prpk.mongodb.net/')  
database = client.Projectsentiment  
collection = database.SmartphoneReview
data = collection.find()

df_comment = pd.DataFrame(data)
# df_comment = df_comment.drop_duplicates(subset="textDisplay")
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_comment.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")
df_comment.head()

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 9,426 ‡πÅ‡∏ñ‡∏ß


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea753106d,‡∏à‡∏≤‡∏Å‡πÉ‡∏à‡πÉ‡∏ä‡πâ 7+ ‡∏°‡∏≤‡∏ô‡∏≤‡∏ô‡∏°‡∏≤‡∏Å‡πÅ‡∏•‡πâ‡∏ß ‡πÅ‡∏ü‡∏ô‡∏ã‡∏∑‡πâ‡∏≠14 ‡πÉ‡∏´‡πâ ‡∏Å‡πá‡∏î‡∏µ‡πÉ‡∏à‡∏™...,Warisara Bee,UCaALObPQL634Ztu7swSTRmw,0,2023-07-17,22:59:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea753106e,‡∏Æ‡πà‡∏≤‡πÜ ‡πÄ‡∏û‡∏¥‡πà‡∏á‡∏ã‡∏∑‡πâ‡∏≠‡πÑ‡∏î‡πâ ‡∏™‡∏≤‡∏°‡∏ß‡∏±‡∏ô‡∏Å‡πâ‡∏°‡∏≤‡πÄ‡∏à‡∏≠‡∏Ñ‡∏•‡∏¥‡∏õ‡∏ô‡∏µ‡πâ 5555,‡∏ß‡∏¥‡∏ë‡∏π‡∏£‡∏¢‡πå ‡∏™‡∏µ‡πÄ‡∏™‡∏°‡∏≠,UCtj26p2jfeGN__L3XT0XJHQ,0,2023-07-16,15:27:06,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
2,64b6c01aa4428eeea753106f,‡πÄ‡πÄ‡∏•‡πâ‡∏ß‡πÑ‡∏á Apple ‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏Ç‡∏≤‡∏¢‡∏™‡πÄ‡∏õ‡∏Å‡πÄ‡∏Å‡πà‡∏≤‡πÅ‡∏Ñ‡πà‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡∏ä‡∏∑‡πà‡∏≠‡πÉ‡∏´...,‡∏à‡∏¥‡πâ‡∏á‡∏à‡∏≠‡∏Å‡∏™‡∏¢‡∏≤‡∏°,UCMpqPwsZS95-qPbJi_pB2SA,0,2023-07-08,04:23:55,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
3,64b6c01aa4428eeea7531070,‡πÉ‡∏ä‡πâ 8+ ‡∏≠‡∏¢‡∏π‡πà ‡∏Å‡∏≥‡∏•‡∏±‡∏á ‡∏°‡∏≠‡∏á 14 ‡∏≠‡∏¢‡∏π‡πà,‡∏ß‡∏∏‡∏í‡∏¥‡∏ä‡∏±‡∏¢ ‡∏®‡∏≤‡∏™‡∏ï‡∏£‡πå‡∏ó‡∏≠‡∏á,UC5mb-wxGvvfH8biaFVc5KxQ,0,2023-06-26,01:38:49,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
4,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


In [None]:
df_apple = df_comment[df_comment["Brand"]=="Apple"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_apple.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

In [12]:
df_apple = df_comment[df_comment["Brand"]=="Apple"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_apple.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 9,534 ‡πÅ‡∏ñ‡∏ß


In [13]:
df_samsung = df_comment[df_comment["Brand"]=="Samsung"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_samsung.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 0 ‡πÅ‡∏ñ‡∏ß


In [14]:
df_oppo = df_comment[df_comment["Brand"]=="Oppo"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_oppo.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 0 ‡πÅ‡∏ñ‡∏ß


In [18]:
# def count_brand():
count_brand_list = []
csv_file = 'smartphone.xlsx' 
df = pd.read_excel(csv_file) 
keywords = (df["Keyword_Search"].drop_duplicates()).tolist()
brands = df["Brand"].tolist()

for keyword,brand in zip(keywords,brands):
    df_brand_aa = df_comment[df_comment['keyword_search']==keyword]
    count = df_brand_aa.shape[0]

    list_brand = [keyword, brand, str(count)]
    count_brand_list.append(list_brand)

df_count_brand = pd.DataFrame(count_brand_list, columns=['Model','Brand','Count'])
df_count_brand = df_count_brand[df_count_brand['Brand']=="Apple"]
df_count_brand

Unnamed: 0,Model,Brand,Count
0,iPhone 14,Apple,676
1,iPhone 14 Plus,Apple,326
2,iPhone 14 Pro,Apple,276
3,iPhone 14 Pro Max,Apple,991
4,iPhone SE (2022),Apple,246
5,iPhone 13,Apple,1149
6,iPhone 13 Pro,Apple,280
7,iPhone 13 Pro Max,Apple,428
8,iPhone 13 mini,Apple,469
9,iPhone 12 mini,Apple,597


# Part of Filter Features & Questionable Data

Part of Filter Features

In [121]:
df_comment["Filter_keyword"] = df_comment["textDisplay"].apply(word_token)
df_comment.head()

Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data,Filter_keyword
0,64b6c01aa4428eeea753107b,‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏ß‡∏π‡∏ö‡∏ß‡∏≤‡∏ö‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡∏≠‡∏∞‡πÑ‡∏£‡∏Ñ‡∏∞,Julaluck Pradujchon,UCkvFpXTpoqxCUqTa57pjy1A,0,2023-05-29,07:31:10,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏Å‡∏•‡πâ‡∏≠‡∏á, ‡∏ß‡∏π‡∏ö‡∏ß‡∏≤‡∏ö, ‡πÄ‡∏õ‡πá‡∏ô, ‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡∏≠‡∏∞‡πÑ‡∏£‡∏Ñ‡∏∞]"
1,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏ä‡∏≠‡∏ö, ‡∏Å‡∏•‡πâ‡∏≠‡∏á, ‡∏Å‡∏±‡∏ö, ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠, ‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠, ‡πÅ‡∏Ñ‡πà, ‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•..."
2,64b6c01aa4428eeea753106e,‡∏Æ‡πà‡∏≤‡πÜ ‡πÄ‡∏û‡∏¥‡πà‡∏á‡∏ã‡∏∑‡πâ‡∏≠‡πÑ‡∏î‡πâ ‡∏™‡∏≤‡∏°‡∏ß‡∏±‡∏ô‡∏Å‡πâ‡∏°‡∏≤‡πÄ‡∏à‡∏≠‡∏Ñ‡∏•‡∏¥‡∏õ‡∏ô‡∏µ‡πâ 5555,‡∏ß‡∏¥‡∏ë‡∏π‡∏£‡∏¢‡πå ‡∏™‡∏µ‡πÄ‡∏™‡∏°‡∏≠,UCtj26p2jfeGN__L3XT0XJHQ,0,2023-07-16,15:27:06,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏Æ‡πà‡∏≤, ‡πÄ‡∏û‡∏¥‡πà‡∏á, ‡∏ã‡∏∑‡πâ‡∏≠, ‡πÑ‡∏î‡πâ, ‡∏™‡∏≤‡∏°, ‡∏ß‡∏±‡∏ô, ‡∏Å‡πâ, ‡∏°‡∏≤, ‡πÄ‡∏à‡∏≠,..."
3,64b6c01aa4428eeea7531075,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠ip13‡∏Ñ‡∏¥‡∏î‡∏ß‡πà‡∏≤‡∏î‡∏µ‡πÑ‡∏´‡∏°‡∏Ñ‡∏∞üòÖ‡∏•‡∏±‡∏á‡πÄ‡∏•‡∏à‡∏±‡∏á,Mimee makeup Beauty blogger,UCuZ9k0t09O-JxUBvYqn7OgQ,0,2023-06-07,11:31:12,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞, ‡∏ã‡∏∑‡πâ‡∏≠, ip13, ‡∏Ñ‡∏¥‡∏î, ‡∏ß‡πà‡∏≤, ‡∏î‡∏µ, ‡πÑ‡∏´‡∏°, ‡∏Ñ‡∏∞, üòÖ..."
4,64b6c01aa4428eeea753106f,‡πÄ‡πÄ‡∏•‡πâ‡∏ß‡πÑ‡∏á Apple ‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏Ç‡∏≤‡∏¢‡∏™‡πÄ‡∏õ‡∏Å‡πÄ‡∏Å‡πà‡∏≤‡πÅ‡∏Ñ‡πà‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡∏ä‡∏∑‡πà‡∏≠‡πÉ‡∏´...,‡∏à‡∏¥‡πâ‡∏á‡∏à‡∏≠‡∏Å‡∏™‡∏¢‡∏≤‡∏°,UCMpqPwsZS95-qPbJi_pB2SA,0,2023-07-08,04:23:55,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡πÅ‡∏•‡πâ‡∏ß, ‡πÑ‡∏á, apple, ‡∏ñ‡πâ‡∏≤, ‡∏à‡∏∞, ‡∏Ç‡∏≤‡∏¢, ‡∏™‡πÄ‡∏õ‡∏Å, ‡πÄ‡∏Å‡πà‡∏≤, ‡πÅ‡∏Ñ..."


In [122]:
df_comment['Filter_keyword'] = df_comment['Filter_keyword'].apply(keyword_filter)
df_comment = df_comment.dropna()
df_comment.reset_index(drop=True, inplace=True)
df_comment = df_comment.drop(columns="Filter_keyword")
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_comment.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")
df_comment.head()

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 1,629 ‡πÅ‡∏ñ‡∏ß


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea753107b,‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏ß‡∏π‡∏ö‡∏ß‡∏≤‡∏ö‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡∏≠‡∏∞‡πÑ‡∏£‡∏Ñ‡∏∞,Julaluck Pradujchon,UCkvFpXTpoqxCUqTa57pjy1A,0,2023-05-29,07:31:10,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
2,64b6c01aa4428eeea7531078,‡∏ô‡∏µ‡πà‡∏ã‡∏∑‡πâ‡∏≠‡∏°‡∏≤‡πÉ‡∏ä‡πâ‡πÑ‡∏î‡πâ 1 ‡πÄ‡∏î‡∏∑‡∏≠‡∏ô ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠...,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
3,64b6c01aa4428eeea75310a0,‡∏à‡∏£‡∏¥‡∏á‡∏°‡∏≤‡∏Å‡πÜ ‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß ‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™...,iaskerman,UC1CdBdpE_JyYq7LhiQpHieg,0,2023-02-08,10:29:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
4,64b6c01aa4428eeea75310bb,‡∏ú‡∏°‡∏ã‡∏∑‡πâ‡∏≠ 13 ‡∏°‡∏¥‡∏ô‡∏¥ ‡πÉ‡∏ä‡πâ‡∏™‡∏ö‡∏≤‡∏¢‡∏°‡∏∑‡∏≠‡∏°‡∏≤‡∏Å‡πÜ ‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ ‡∏Å‡∏•...,Speed22,UCuF3XxhDkdBl_8RdNMe7nKw,0,2022-12-19,07:53:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


In [25]:
df_apple = df_comment[df_comment["Brand"]=="Apple"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_apple.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 1,629 ‡πÅ‡∏ñ‡∏ß


In [26]:
df_samsung = df_comment[df_comment["Brand"]=="Samsung"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_samsung.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 0 ‡πÅ‡∏ñ‡∏ß


In [27]:
df_oppo = df_comment[df_comment["Brand"]=="Oppo"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_oppo.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 0 ‡πÅ‡∏ñ‡∏ß


In [29]:
# def count_brand():
count_brand_list = []
csv_file = 'test.xlsx' 
df = pd.read_excel(csv_file) 
keywords = (df["Keyword_Search"].drop_duplicates()).tolist()
brands = df["Brand"].tolist()

for keyword,brand in zip(keywords,brands):
    df_brand_aa = df_comment[df_comment['keyword_search']==keyword]
    count = df_brand_aa.shape[0]

    list_brand = [keyword, brand, str(count)]
    count_brand_list.append(list_brand)

df_count_brand = pd.DataFrame(count_brand_list, columns=['Model','Brand','Count'])
df_count_brand = df_count_brand[df_count_brand['Brand']=="Apple"]
df_count_brand

Unnamed: 0,Model,Brand,Count
18,iPhone 14,Apple,119
19,iPhone 14 Plus,Apple,66
20,iPhone 14 Pro,Apple,51
21,iPhone 14 Pro Max,Apple,258
22,iPhone SE (2022),Apple,78
23,iPhone 13,Apple,210
24,iPhone 13 Pro,Apple,44
25,iPhone 13 Pro Max,Apple,95
26,iPhone 13 mini,Apple,66
27,iPhone 12 mini,Apple,136


In [None]:
# x = input("Please select Brand: ")
def showChart():
    x = input("Please select Brand: ")
    df_chart = df_count_brand[df_count_brand["Brand"]==x].copy()
    df_chart['Count'] = pd.to_numeric(df_chart['Count'], errors='coerce')
    fig, ax = plt.subplots()
    barchart = sns.barplot(x=df_chart['Count'], y=df_chart['Model'],palette="flare")
    barchart.bar_label(ax.containers[0], label_type='edge', padding=15)
    ax.margins(y=0.05)
    plt.show()

    return 

showChart()


Part of Filter Out Questionable data

In [123]:
df_questionable_filter = question_filter(df_comment, "textDisplay")
df_questionable_filter.reset_index(drop=True, inplace=True)
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_questionable_filter.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")
df_questionable_filter.head()

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 1,364 ‡πÅ‡∏ñ‡∏ß


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea7531078,‡∏ô‡∏µ‡πà‡∏ã‡∏∑‡πâ‡∏≠‡∏°‡∏≤‡πÉ‡∏ä‡πâ‡πÑ‡∏î‡πâ 1 ‡πÄ‡∏î‡∏∑‡∏≠‡∏ô ‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠...,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
2,64b6c01aa4428eeea75310a0,‡∏à‡∏£‡∏¥‡∏á‡∏°‡∏≤‡∏Å‡πÜ ‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß ‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™...,iaskerman,UC1CdBdpE_JyYq7LhiQpHieg,0,2023-02-08,10:29:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
3,64b6c01aa4428eeea75310bb,‡∏ú‡∏°‡∏ã‡∏∑‡πâ‡∏≠ 13 ‡∏°‡∏¥‡∏ô‡∏¥ ‡πÉ‡∏ä‡πâ‡∏™‡∏ö‡∏≤‡∏¢‡∏°‡∏∑‡∏≠‡∏°‡∏≤‡∏Å‡πÜ ‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ ‡∏Å‡∏•...,Speed22,UCuF3XxhDkdBl_8RdNMe7nKw,0,2022-12-19,07:53:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
4,64b6c01aa4428eeea7531168,60 hz ‡∏õ‡∏±‡∏à‡∏à‡∏∏‡∏ö‡∏±‡∏ô‡πÉ‡∏ä‡πâ 120 ‡∏û‡∏≠‡πÄ‡∏•‡πà‡∏ô‡∏´‡∏ô‡πâ‡∏≤‡∏à‡∏≠‡∏£‡∏µ‡πÄ‡∏ü‡∏£‡∏ä‡∏ô‡πâ‡∏≠‡∏¢ ‡πÄ...,Natthaphon Yasamut,UCHvk6417ehNEfLDEQSX47TQ,0,2022-10-03,12:56:19,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


In [124]:
df_apple = df_questionable_filter[df_questionable_filter["Brand"]=="Apple"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_apple.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 1,364 ‡πÅ‡∏ñ‡∏ß


In [32]:
df_apple = df_questionable_filter[df_questionable_filter["Brand"]=="Samsung"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_apple.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 0 ‡πÅ‡∏ñ‡∏ß


In [33]:
df_apple = df_questionable_filter[df_questionable_filter["Brand"]=="Oppo"]
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_apple.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 0 ‡πÅ‡∏ñ‡∏ß


# Part of Sentence Tokenize

In [125]:
df_comment_sentence = tokenize_and_split_sentences(df_questionable_filter, 'textDisplay')
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_comment_sentence.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")
df_comment_sentence.head()

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 4,168 ‡πÅ‡∏ñ‡∏ß


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea7531078,‡∏ô‡∏µ‡πà‡∏ã‡∏∑‡πâ‡∏≠‡∏°‡∏≤‡πÉ‡∏ä‡πâ‡πÑ‡∏î‡πâ 1 ‡πÄ‡∏î‡∏∑‡∏≠‡∏ô,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
2,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
3,64b6c01aa4428eeea7531078,‡∏°‡∏±‡∏ô‡∏à‡∏∞‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡∏ñ‡∏∂‡∏á 70%‡πÑ‡∏´‡∏°‡∏ô‡πâ‡∏≠‡∏ú‡∏°‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≤‡∏ß‡∏Å‡∏ù‡∏±‡πà‡∏á‡πÅ‡∏≠‡∏ô‡∏î‡∏£‡∏≠‡∏¢‡∏î‡πå...,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
4,64b6c01aa4428eeea7531078,‡πÅ‡∏•‡∏∞‡πÄ‡∏û‡∏¥‡πà‡∏á‡∏¢‡πâ‡∏≤‡∏¢‡∏°‡∏≤‡∏ù‡∏±‡πà‡∏á‡πÑ‡∏≠‡πÇ‡∏ü‡∏ô,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


Filter Features Word

In [126]:
df_comment_sentence["Filter_keyword"] = df_comment_sentence["textDisplay"].apply(word_token)
df_comment_sentence.head()

Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data,Filter_keyword
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏ä‡∏≠‡∏ö, ‡∏Å‡∏•‡πâ‡∏≠‡∏á, ‡∏Å‡∏±‡∏ö, ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠, ‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠, ‡πÅ‡∏Ñ‡πà, ‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•..."
1,64b6c01aa4428eeea7531078,‡∏ô‡∏µ‡πà‡∏ã‡∏∑‡πâ‡∏≠‡∏°‡∏≤‡πÉ‡∏ä‡πâ‡πÑ‡∏î‡πâ 1 ‡πÄ‡∏î‡∏∑‡∏≠‡∏ô,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏ô‡∏µ‡πà, ‡∏ã‡∏∑‡πâ‡∏≠, ‡∏°‡∏≤, ‡πÉ‡∏ä‡πâ‡πÑ‡∏î‡πâ, 1, ‡πÄ‡∏î‡∏∑‡∏≠‡∏ô]"
2,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û, ‡πÅ‡∏ö‡∏ï, ‡∏•‡∏î, ‡πÑ‡∏õ, 3, ‡πÄ‡∏´‡∏•‡∏∑‡∏≠, 97, ‡∏ñ‡πâ‡∏≤, ‡∏´‡∏°‡∏î, ..."
3,64b6c01aa4428eeea7531078,‡∏°‡∏±‡∏ô‡∏à‡∏∞‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡∏ñ‡∏∂‡∏á 70%‡πÑ‡∏´‡∏°‡∏ô‡πâ‡∏≠‡∏ú‡∏°‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≤‡∏ß‡∏Å‡∏ù‡∏±‡πà‡∏á‡πÅ‡∏≠‡∏ô‡∏î‡∏£‡∏≠‡∏¢‡∏î‡πå...,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡∏°‡∏±‡∏ô, ‡∏à‡∏∞, ‡πÄ‡∏´‡∏•‡∏∑‡∏≠, ‡∏ñ‡∏∂‡∏á, 70, ‡πÑ‡∏´‡∏°, ‡∏ô‡πâ‡∏≠‡∏ú‡∏°, ‡πÄ‡∏õ‡πá‡∏ô, ‡∏™‡∏≤..."
4,64b6c01aa4428eeea7531078,‡πÅ‡∏•‡∏∞‡πÄ‡∏û‡∏¥‡πà‡∏á‡∏¢‡πâ‡∏≤‡∏¢‡∏°‡∏≤‡∏ù‡∏±‡πà‡∏á‡πÑ‡∏≠‡πÇ‡∏ü‡∏ô,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment,"[‡πÅ‡∏•‡∏∞, ‡πÄ‡∏û‡∏¥‡πà‡∏á, ‡∏¢‡πâ‡∏≤‡∏¢, ‡∏°‡∏≤, ‡∏ù‡∏±‡πà‡∏á, ‡πÑ‡∏≠‡πÇ‡∏ü‡∏ô]"


In [127]:
df_comment_sentence['Filter_keyword'] = df_comment_sentence['Filter_keyword'].apply(keyword_filter)
df_comment_sentence = df_comment_sentence.dropna()
df_comment_sentence.reset_index(drop=True, inplace=True)
df_comment_sentence = df_comment_sentence.drop(columns="Filter_keyword")
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_comment_sentence.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")
df_comment_sentence.head()

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡πÅ‡∏ñ‡∏ß‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 1,767 ‡πÅ‡∏ñ‡∏ß


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
2,64b6c01aa4428eeea75310a0,‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß,iaskerman,UC1CdBdpE_JyYq7LhiQpHieg,0,2023-02-08,10:29:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
3,64b6c01aa4428eeea75310a0,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏ã‡∏∑‡πâ‡∏≠ 14 ‡πÅ‡∏ï‡πà‡∏û‡∏≠‡∏à‡∏≠ 60 ‡∏ö‡∏≤‡∏¢‡πÄ‡∏•‡∏¢‡∏Ñ‡πà‡∏∞,iaskerman,UC1CdBdpE_JyYq7LhiQpHieg,0,2023-02-08,10:29:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
4,64b6c01aa4428eeea75310bb,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ,Speed22,UCuF3XxhDkdBl_8RdNMe7nKw,0,2022-12-19,07:53:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


Filter Questionable data

In [128]:
df_questionable_filter = question_filter(df_comment_sentence, "textDisplay")
df_questionable_filter.reset_index(drop=True, inplace=True)
print("‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î",f'{df_questionable_filter.shape[0]:,}',"‡πÅ‡∏ñ‡∏ß")
df_questionable_filter.head()

‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 1,766 ‡πÅ‡∏ñ‡∏ß


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,‡∏û‡∏µ‡πà‡πÄ‡∏•‡∏¥‡∏Å‡∏ö‡∏≤‡∏£‡∏∞‡πÅ‡∏ô‡∏∞,UCMJ8iDZpoo0d8fGXDWVZEew,0,2023-06-24,20:09:33,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,Over Liu,UCcjKfHTRGfJSIJIoQe_EYIw,0,2023-06-04,15:36:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
2,64b6c01aa4428eeea75310a0,‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß,iaskerman,UC1CdBdpE_JyYq7LhiQpHieg,0,2023-02-08,10:29:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
3,64b6c01aa4428eeea75310a0,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏ã‡∏∑‡πâ‡∏≠ 14 ‡πÅ‡∏ï‡πà‡∏û‡∏≠‡∏à‡∏≠ 60 ‡∏ö‡∏≤‡∏¢‡πÄ‡∏•‡∏¢‡∏Ñ‡πà‡∏∞,iaskerman,UC1CdBdpE_JyYq7LhiQpHieg,0,2023-02-08,10:29:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
4,64b6c01aa4428eeea75310bb,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ,Speed22,UCuF3XxhDkdBl_8RdNMe7nKw,0,2022-12-19,07:53:43,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


In [129]:
df_corpus = df_questionable_filter[["_id", "textDisplay"]]
df_corpus

Unnamed: 0,_id,textDisplay
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...
1,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ
2,64b6c01aa4428eeea75310a0,‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß
3,64b6c01aa4428eeea75310a0,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏ã‡∏∑‡πâ‡∏≠ 14 ‡πÅ‡∏ï‡πà‡∏û‡∏≠‡∏à‡∏≠ 60 ‡∏ö‡∏≤‡∏¢‡πÄ‡∏•‡∏¢‡∏Ñ‡πà‡∏∞
4,64b6c01aa4428eeea75310bb,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ
...,...,...
1761,64b965335b57b20bb1eb55c3,‡∏û‡∏µ‡πà‡∏ï‡∏±‡πâ‡∏° ‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡∏ä‡∏±‡∏î‡∏°‡∏≤‡∏Å555
1762,64b965335b57b20bb1eb55ea,‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠360p
1763,64b965335b57b20bb1eb555b,‡πÄ‡∏ß‡∏•‡∏≤‡∏î‡∏π‡∏Ñ‡∏•‡∏¥‡∏õ‡∏Å‡πá‡πÑ‡∏°‡πà‡πÄ‡∏ï‡πá‡∏°‡∏à‡∏≠‚Äã ‡πÅ‡∏ñ‡∏°‡∏° ‡πÑ‡∏î‡πâ‡∏à‡∏≠hd ‡∏≠‡∏∞‡∏£‡∏≤‡∏Ñ‡∏≤‡∏ô‡∏µ‡πâ‚Äã...
1764,64b965335b57b20bb1eb555b,‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏ô‡πâ‡∏≠‡∏¢‚Äã‡πÄ‡∏≠‡∏≤‡∏à‡∏≠18‡∏ï‡πà‡∏≠9‡∏°‡∏≤‡∏Å‡πá‡∏¢‡∏±‡∏á‡πÑ‡∏°‡πà‡∏´‡∏ô‡πâ‡∏≤‡πÄ‡∏Å‡∏•‡∏µ‡∏¢‡∏î‡∏î‚Äã #2020


In [130]:
df_corpus.to_excel("Corpus_after_remove_question_clean.xlsx")

In [73]:
# def analyze_sentiment_AIforthai(df):
#     api_key = "FOFZpO9UFwSMquHMwVmZ7cjebmv0Yv8a"
#     results_df = pd.DataFrame(columns=["_id","comment", "sentiment_data"])
#     for index, row in df.iterrows():
#         sentence_id = row['_id']
#         comment = row['textDisplay']
#         response = requests.post(
#             "https://api.aiforthai.in.th/ssense",
#             data={"text": comment},
#             headers={"Apikey": api_key}
#         )
#         sentiment_data = dict(response.json())
#         if sentiment_data["sentiment"]["score"] == "0":
#             sentiment_data = "neu"
#         else:
#             sentiment_data = sentiment_data["sentiment"]['polarity'][:3]
#         results_df.loc[index] = [sentence_id,comment, sentiment_data]
#     return results_df

In [50]:
import json

def analyze_sentiment_AIforthai(df):
    api_key = "FOFZpO9UFwSMquHMwVmZ7cjebmv0Yv8a"
    new_df = df.copy()

    for index, row in new_df.iterrows():
        comment = row["textDisplay"]

        response = requests.post(
            "https://api.aiforthai.in.th/ssense",
            data={"text": comment},
            headers={"Apikey": api_key}
        )

        try:
            response_json = response.json()
            
            # Check if response contains valid JSON data
            if response_json:
                sentiment_score = response_json["sentiment"]["score"]

                if sentiment_score == 0:
                    sentiment_data = "neu"
                else:
                    sentiment_polarity = response_json["sentiment"]["polarity"]
                    sentiment_data = sentiment_polarity[:3]

            else:
                sentiment_data = None

        except json.JSONDecodeError as e:
            # print(f"Error decoding JSON response for row {index}: {e}")
            sentiment_data = None

        new_df.at[index, "sentiment"] = sentiment_data

    return new_df


---------------------------------------------------------------------------------------------------------

In [109]:
import spacy_thai
nlp = spacy_thai.load()
sentence = "14‡∏û‡∏•‡∏±‡∏™‡∏î‡∏µ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ú‡∏° ‡∏à‡∏≠‡πÉ‡∏´‡∏ç‡πà ‡πÅ‡∏ö‡∏ï‡∏≠‡∏∂‡∏î ‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏Å‡πá‡∏™‡∏ß‡∏¢‡∏°‡∏≤‡∏Å‡πÜ‡∏≠‡∏µ‡∏Å‡∏î‡πâ‡∏ß‡∏¢‡∏Ñ‡∏£‡∏±‡∏ö"
doc = nlp(sentence)
for token in doc:
    print(f"Head: {token.head.text}(POS: {token.head.pos_}), Relationship:{token.dep_}, Dependent:{token.text}, Result: {token.text+token.head.text}")

Head: ‡∏û‡∏•‡∏±‡∏™(POS: NOUN), Relationship:nummod, Dependent:14, Result: 14‡∏û‡∏•‡∏±‡∏™
Head: ‡∏î‡∏µ(POS: ADJ), Relationship:nsubj, Dependent:‡∏û‡∏•‡∏±‡∏™, Result: ‡∏û‡∏•‡∏±‡∏™‡∏î‡∏µ
Head: ‡∏î‡∏µ(POS: ADJ), Relationship:ROOT, Dependent:‡∏î‡∏µ, Result: ‡∏î‡∏µ‡∏î‡∏µ
Head: ‡∏î‡∏µ(POS: ADJ), Relationship:advmod, Dependent:‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î, Result: ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î‡∏î‡∏µ
Head: ‡∏ú‡∏°(POS: PRON), Relationship:case, Dependent:‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö, Result: ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ú‡∏°
Head: ‡∏î‡∏µ(POS: ADJ), Relationship:obl, Dependent:‡∏ú‡∏°, Result: ‡∏ú‡∏°‡∏î‡∏µ
Head: ‡∏ú‡∏°(POS: PRON), Relationship:compound, Dependent:‡∏à‡∏≠, Result: ‡∏à‡∏≠‡∏ú‡∏°
Head: ‡∏™‡∏ß‡∏¢(POS: ADJ), Relationship:advmod, Dependent:‡πÉ‡∏´‡∏ç‡πà, Result: ‡πÉ‡∏´‡∏ç‡πà‡∏™‡∏ß‡∏¢
Head: ‡∏™‡∏ß‡∏¢(POS: ADJ), Relationship:nsubj, Dependent:‡πÅ‡∏ö‡∏ï, Result: ‡πÅ‡∏ö‡∏ï‡∏™‡∏ß‡∏¢
Head: ‡πÅ‡∏ö‡∏ï(POS: NOUN), Relationship:compound, Dependent:‡∏≠‡∏∂‡∏î, Result: ‡∏≠‡∏∂‡∏î‡πÅ‡∏ö‡∏ï
Head: ‡πÅ‡∏ö‡∏ï(POS: NOUN), Relationship:compound, Depe

In [55]:
import pythainlp
import pandas as pd

def tokenize_and_split_sentences(dataframe, text_column_name):
    # Create a new DataFrame to store the tokenized sentences
    new_dataframe = pd.DataFrame(columns=dataframe.columns)
    rows_to_concat = []
    
    # Iterate over each row in the original DataFrame
    for _, row in dataframe.iterrows():
        text = row[text_column_name]
        sentences = pythainlp.sent_tokenize(text)
        
        # Create a new row for each tokenized sentence
        for sentence in sentences:
            new_row = row.copy()
            new_row[text_column_name] = sentence
            rows_to_concat.append(new_row)
    
    # Convert the list of rows into a DataFrame
    new_dataframe = pd.DataFrame(rows_to_concat)
    
    return new_dataframe

# Example usage
# Assuming you have a DataFrame named 'df' with a column named 'textDisplay'
df = pd.DataFrame({
    '_id': ['64b6c01aa4428eeea753106d', '64b6c01aa4428eeea7531070'],
    'textDisplay': ['‡∏à‡∏≤‡∏Å‡πÉ‡∏à‡πÉ‡∏ä‡πâ 7+ ‡∏°‡∏≤‡∏ô‡∏≤‡∏ô‡∏°‡∏≤‡∏Å‡πÅ‡∏•‡πâ‡∏ß ‡πÅ‡∏ü‡∏ô‡∏ã‡∏∑‡πâ‡∏≠14 ‡πÉ‡∏´‡πâ ‡∏Å‡πá‡∏î‡∏µ‡πÉ‡∏à‡∏™...', '‡πÉ‡∏ä‡πâ 8+ ‡∏≠‡∏¢‡∏π‡πà ‡∏Å‡∏≥‡∏•‡∏±‡∏á ‡∏°‡∏≠‡∏á 14 ‡∏≠‡∏¢‡∏π‡πà'],
    'authorDisplayName': ['Warisara Bee', '‡∏ß‡∏∏‡∏í‡∏¥‡∏ä‡∏±‡∏¢ ‡∏®‡∏≤‡∏™‡∏ï‡∏£‡πå‡∏ó‡∏≠‡∏á'],
    'author_id': ['UCaALObPQL634Ztu7swSTRmw', 'UC5mb-wxGvvfH8biaFVc5KxQ'],
    'likeCount': [0, 0],
    'publishedDate': ['2023-07-17', '2023-06-26'],
    'publishedTime': ['22:59:09', '01:38:49'],
    'videoId': ['https://www.youtube.com/watch?v=97bV_hvnUng', 'https://www.youtube.com/watch?v=97bV_hvnUng'],
    'keyword_search': ['iPhone 14', 'iPhone 14'],
    'Brand': ['Apple', 'Apple'],
    'type_data': ['Comment', 'Comment']
})

new_df = tokenize_and_split_sentences(df, 'textDisplay')
new_df


Unnamed: 0,_id,textDisplay,authorDisplayName,author_id,likeCount,publishedDate,publishedTime,videoId,keyword_search,Brand,type_data
0,64b6c01aa4428eeea753106d,‡∏à‡∏≤‡∏Å‡πÉ‡∏à‡πÉ‡∏ä‡πâ 7+ ‡∏°‡∏≤‡∏ô‡∏≤‡∏ô‡∏°‡∏≤‡∏Å‡πÅ‡∏•‡πâ‡∏ß,Warisara Bee,UCaALObPQL634Ztu7swSTRmw,0,2023-07-17,22:59:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
0,64b6c01aa4428eeea753106d,‡πÅ‡∏ü‡∏ô‡∏ã‡∏∑‡πâ‡∏≠14 ‡πÉ‡∏´‡πâ ‡∏Å‡πá‡∏î‡∏µ‡πÉ‡∏à‡∏™...,Warisara Bee,UCaALObPQL634Ztu7swSTRmw,0,2023-07-17,22:59:09,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment
1,64b6c01aa4428eeea7531070,‡πÉ‡∏ä‡πâ 8+ ‡∏≠‡∏¢‡∏π‡πà ‡∏Å‡∏≥‡∏•‡∏±‡∏á ‡∏°‡∏≠‡∏á 14 ‡∏≠‡∏¢‡∏π‡πà,‡∏ß‡∏∏‡∏í‡∏¥‡∏ä‡∏±‡∏¢ ‡∏®‡∏≤‡∏™‡∏ï‡∏£‡πå‡∏ó‡∏≠‡∏á,UC5mb-wxGvvfH8biaFVc5KxQ,0,2023-06-26,01:38:49,https://www.youtube.com/watch?v=97bV_hvnUng,iPhone 14,Apple,Comment


In [131]:
csv_file1 = 'Corpus_after_remove_question_clean.xlsx' 
df1 = pd.read_excel(csv_file1)
df1 = df1[['_id','textDisplay']]
df1.head()

Unnamed: 0,_id,textDisplay
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...
1,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ
2,64b6c01aa4428eeea75310a0,‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß
3,64b6c01aa4428eeea75310a0,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏ã‡∏∑‡πâ‡∏≠ 14 ‡πÅ‡∏ï‡πà‡∏û‡∏≠‡∏à‡∏≠ 60 ‡∏ö‡∏≤‡∏¢‡πÄ‡∏•‡∏¢‡∏Ñ‡πà‡∏∞
4,64b6c01aa4428eeea75310bb,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ


In [132]:
df1.shape[0]

1766

In [133]:
csv_file2 = 'Corpus_data.xlsx' 
df2 = pd.read_excel(csv_file2)
df2 = df2[['_id','textDisplay','Overview sentiment','Features']]
df2.head()

Unnamed: 0,_id,textDisplay,Overview sentiment,Features
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,pos,‡∏Å‡∏•‡πâ‡∏≠‡∏á:pos
1,64b6c01aa4428eeea7531076,‡∏≠‡∏¢‡∏≤‡∏Å‡∏ó‡∏£‡∏≤‡∏ö‡∏ß‡∏¥‡∏ò‡∏µ‡∏£‡∏±‡∏Å‡∏©‡∏≤‡πÅ‡∏ö‡∏ï 14 ‡∏Å‡∏±‡∏ö‡∏Ñ‡∏ß‡∏≤‡∏°‡∏£‡πâ‡∏≠‡∏ô‡∏Ç‡∏≠‡∏á‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏Ñ...,neu,
2,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,neu,‡πÅ‡∏ö‡∏ï:neu
3,64b6c01aa4428eeea753107e,‡∏ú‡∏°‡πÉ‡∏ä‡πâ 14 ‡∏û‡∏≠‡∏õ‡∏¥‡∏î‡∏´‡∏ô‡πâ‡∏≤‡∏à‡∏≠,neu,
4,64b6c01aa4428eeea7531080,‡∏™‡∏á‡∏™‡∏±‡∏¢‡∏ï‡πâ‡∏≠‡∏á 15 ‡πÄ‡∏≠‡∏≤ ‡πÅ‡∏Ñ‡πà c ‡∏Å‡πá‡∏û‡∏≠‡∏Å‡πà‡∏≠‡∏ô‡∏•‡∏∞‡∏°‡πâ‡∏≤‡∏á ‡πÑ‡∏≠ 18 ‡∏Ñ‡πà...,neu,‡∏à‡∏≠:neu


In [134]:
df2.shape[0]

1000

In [139]:
merged_df = df1.merge(df2, on='textDisplay',how='left',suffixes=('','df2'))
merged_df

Unnamed: 0,_id,textDisplay,_iddf2,Overview sentiment,Features
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,64b6c01aa4428eeea7531071,pos,‡∏Å‡∏•‡πâ‡∏≠‡∏á:pos
1,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,64b6c01aa4428eeea7531078,neu,‡πÅ‡∏ö‡∏ï:neu
2,64b6c01aa4428eeea75310a0,‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß,,,
3,64b6c01aa4428eeea75310a0,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏ã‡∏∑‡πâ‡∏≠ 14 ‡πÅ‡∏ï‡πà‡∏û‡∏≠‡∏à‡∏≠ 60 ‡∏ö‡∏≤‡∏¢‡πÄ‡∏•‡∏¢‡∏Ñ‡πà‡∏∞,,,
4,64b6c01aa4428eeea75310bb,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ,64b6c01aa4428eeea75310bb,pos,
...,...,...,...,...,...
1785,64b965335b57b20bb1eb55c3,‡∏û‡∏µ‡πà‡∏ï‡∏±‡πâ‡∏° ‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡∏ä‡∏±‡∏î‡∏°‡∏≤‡∏Å555,,,
1786,64b965335b57b20bb1eb55ea,‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠360p,,,
1787,64b965335b57b20bb1eb555b,‡πÄ‡∏ß‡∏•‡∏≤‡∏î‡∏π‡∏Ñ‡∏•‡∏¥‡∏õ‡∏Å‡πá‡πÑ‡∏°‡πà‡πÄ‡∏ï‡πá‡∏°‡∏à‡∏≠‚Äã ‡πÅ‡∏ñ‡∏°‡∏° ‡πÑ‡∏î‡πâ‡∏à‡∏≠hd ‡∏≠‡∏∞‡∏£‡∏≤‡∏Ñ‡∏≤‡∏ô‡∏µ‡πâ‚Äã...,,,
1788,64b965335b57b20bb1eb555b,‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏ô‡πâ‡∏≠‡∏¢‚Äã‡πÄ‡∏≠‡∏≤‡∏à‡∏≠18‡∏ï‡πà‡∏≠9‡∏°‡∏≤‡∏Å‡πá‡∏¢‡∏±‡∏á‡πÑ‡∏°‡πà‡∏´‡∏ô‡πâ‡∏≤‡πÄ‡∏Å‡∏•‡∏µ‡∏¢‡∏î‡∏î‚Äã #2020,,,


In [140]:
result_df = merged_df[["_id","textDisplay", "Overview sentiment", "Features"]]
result_df = result_df.drop_duplicates(subset='textDisplay')
result_df

Unnamed: 0,_id,textDisplay,Overview sentiment,Features
0,64b6c01aa4428eeea7531071,‡∏ä‡∏≠‡∏ö‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏Å‡∏±‡∏ö ‡∏ï‡∏±‡∏î‡∏ï‡πà‡∏≠‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡πÅ‡∏Ñ‡πà‡∏ô‡∏±‡πâ‡∏ô‡πÅ‡∏´‡∏•‡∏∞‡∏ñ‡πâ‡∏≤‡∏à‡∏∞‡∏ã‡∏∑‡πâ‡∏≠‡πÑ...,pos,‡∏Å‡∏•‡πâ‡∏≠‡∏á:pos
1,64b6c01aa4428eeea7531078,‡∏Ñ‡∏∏‡∏ì‡∏†‡∏≤‡∏û‡πÅ‡∏ö‡∏ï‡∏•‡∏î‡πÑ‡∏õ 3% ‡πÄ‡∏´‡∏•‡∏∑‡∏≠ 97%‡∏ñ‡πâ‡∏≤‡∏´‡∏°‡∏î‡πÑ‡∏õ1‡∏õ‡∏µ,neu,‡πÅ‡∏ö‡∏ï:neu
2,64b6c01aa4428eeea75310a0,‡∏°‡∏±‡∏ô‡∏Ñ‡∏ß‡∏£‡∏õ‡∏•‡πà‡∏≠‡∏¢ 120hz ‡πÑ‡∏î‡πâ‡πÅ‡∏•‡πâ‡∏ß,,
3,64b6c01aa4428eeea75310a0,‡∏Å‡∏≥‡∏•‡∏±‡∏á‡∏à‡∏∞‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à‡∏ã‡∏∑‡πâ‡∏≠ 14 ‡πÅ‡∏ï‡πà‡∏û‡∏≠‡∏à‡∏≠ 60 ‡∏ö‡∏≤‡∏¢‡πÄ‡∏•‡∏¢‡∏Ñ‡πà‡∏∞,,
4,64b6c01aa4428eeea75310bb,‡πÅ‡∏ö‡∏ï‡πÄ‡∏ï‡∏≠‡∏£‡∏µ‡πà‡πÇ‡∏≠‡πÄ‡∏Ñ,pos,
...,...,...,...,...
1785,64b965335b57b20bb1eb55c3,‡∏û‡∏µ‡πà‡∏ï‡∏±‡πâ‡∏° ‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠‡∏ä‡∏±‡∏î‡∏°‡∏≤‡∏Å555,,
1786,64b965335b57b20bb1eb55ea,‡∏ß‡∏µ‡∏î‡∏µ‡πÇ‡∏≠360p,,
1787,64b965335b57b20bb1eb555b,‡πÄ‡∏ß‡∏•‡∏≤‡∏î‡∏π‡∏Ñ‡∏•‡∏¥‡∏õ‡∏Å‡πá‡πÑ‡∏°‡πà‡πÄ‡∏ï‡πá‡∏°‡∏à‡∏≠‚Äã ‡πÅ‡∏ñ‡∏°‡∏° ‡πÑ‡∏î‡πâ‡∏à‡∏≠hd ‡∏≠‡∏∞‡∏£‡∏≤‡∏Ñ‡∏≤‡∏ô‡∏µ‡πâ‚Äã...,,
1788,64b965335b57b20bb1eb555b,‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏ô‡πâ‡∏≠‡∏¢‚Äã‡πÄ‡∏≠‡∏≤‡∏à‡∏≠18‡∏ï‡πà‡∏≠9‡∏°‡∏≤‡∏Å‡πá‡∏¢‡∏±‡∏á‡πÑ‡∏°‡πà‡∏´‡∏ô‡πâ‡∏≤‡πÄ‡∏Å‡∏•‡∏µ‡∏¢‡∏î‡∏î‚Äã #2020,,


In [141]:
result_df.to_excel("test_df.xlsx")

--------------------------------------------------------------

# Test

In [30]:
import spacy
import spacy_pythainlp.core


nlp = spacy.blank("th")
# Segment the Doc into sentences
nlp.add_pipe(
   "pythainlp", 
   config={
        "pos_engine": "perceptron",
        "pos": True,
        "pos_corpus": "orchid_ud",
        "sent_engine": "crfcut",
        "sent": True,
        "ner_engine": "thainer",
        "ner": True,
        "tokenize_engine": "newmm",
        "tokenize": False,
        "dependency_parsing": False,
        "dependency_parsing_engine": "esupar",
        "dependency_parsing_model": None,
        "word_vector": True,
        "word_vector_model": "thai2fit_wv"
    }
)

text = "13pm ‡∏Ç‡∏≠‡∏ú‡∏° ‡πÄ‡∏ß‡∏•‡∏≤‡πÉ‡∏ä‡πâ‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏°‡∏±‡∏ô‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡∏à‡∏∞‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÄ‡∏•‡∏ô‡πÄ‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡∏≠‡∏∞‡πÑ‡∏£‡∏™‡∏±‡∏Å‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏°‡∏±‡∏ô‡πÄ‡∏ö‡∏•‡∏≠‡∏≠‡∏∞‡∏Ñ‡∏±‡∏ö ‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏Å‡πá‡πÑ‡∏°‡πà‡∏Ñ‡πà‡∏≠‡∏¢‡πÇ‡∏ü‡∏Å‡∏±‡∏™ ‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏û‡∏£‡∏≤‡∏∞ ‡∏ï‡∏¥‡∏î‡∏ü‡∏¥‡∏•‡∏°‡πå‡∏Ñ‡∏£‡∏≠‡∏ö‡πÄ‡∏•‡∏ô‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡∏õ‡πà‡∏≤‡∏ß‡∏Ñ‡∏±‡∏ö"
data=nlp(text)
print(list(data.sents))
print('------------------')
print(sent_tokenize(text))
# output: [‡∏ú‡∏°‡πÄ‡∏õ‡πá‡∏ô‡∏Ñ‡∏ô‡πÑ‡∏ó‡∏¢   ‡πÅ‡∏ï‡πà‡∏°‡∏∞‡∏•‡∏¥‡∏≠‡∏¢‡∏≤‡∏Å‡πÑ‡∏õ‡πÇ‡∏£‡∏á‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡∏™‡πà‡∏ß‡∏ô‡∏ú‡∏°‡∏à‡∏∞‡πÑ‡∏õ‡πÑ‡∏´‡∏ô  , ‡∏ú‡∏°‡∏≠‡∏¢‡∏≤‡∏Å‡πÑ‡∏õ‡πÄ‡∏ó‡∏µ‡πà‡∏¢‡∏ß]

[13pm , ‡∏Ç‡∏≠‡∏ú‡∏° ‡πÄ‡∏ß‡∏•‡∏≤, ‡πÉ‡∏ä‡πâ‡∏Å‡∏•‡πâ‡∏≠‡∏á ‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏Å‡∏•‡πâ‡∏≠‡∏á, ‡∏°‡∏±‡∏ô‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡∏à‡∏∞‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÄ‡∏•‡∏ô‡πÄ‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡∏≠‡∏∞‡πÑ‡∏£‡∏™‡∏±‡∏Å‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏°‡∏±‡∏ô‡πÄ‡∏ö‡∏•‡∏≠‡∏≠‡∏∞‡∏Ñ‡∏±‡∏ö ‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏Å‡πá‡πÑ‡∏°‡πà‡∏Ñ‡πà‡∏≠‡∏¢, ‡πÇ‡∏ü‡∏Å‡∏±‡∏™ ‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏û‡∏£‡∏≤‡∏∞ ‡∏ï‡∏¥‡∏î‡∏ü‡∏¥‡∏•‡∏°‡πå‡∏Ñ‡∏£‡∏≠‡∏ö‡πÄ‡∏•‡∏ô‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡∏õ‡πà‡∏≤‡∏ß‡∏Ñ‡∏±‡∏ö]
------------------
['13pm ', '‡∏Ç‡∏≠‡∏ú‡∏° ', '‡πÄ‡∏ß‡∏•‡∏≤‡πÉ‡∏ä‡πâ‡∏Å‡∏•‡πâ‡∏≠‡∏á ', '‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏°‡∏±‡∏ô‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡∏à‡∏∞‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÄ‡∏•‡∏ô‡πÄ‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡∏≠‡∏∞‡πÑ‡∏£‡∏™‡∏±‡∏Å‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏°‡∏±‡∏ô‡πÄ‡∏ö‡∏•‡∏≠‡∏≠‡∏∞‡∏Ñ‡∏±‡∏ö ', '‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏Å‡πá‡πÑ‡∏°‡πà‡∏Ñ‡πà‡∏≠‡∏¢‡πÇ‡∏ü‡∏Å‡∏±‡∏™ ‡πÄ‡∏õ‡πá‡∏ô‡πÄ‡∏û‡∏£‡∏≤‡∏∞ ‡∏ï‡∏¥‡∏î‡∏ü‡∏¥‡∏•‡∏°‡πå‡∏Ñ‡∏£‡∏≠‡∏ö‡πÄ‡∏•‡∏ô‡∏Å‡∏•‡πâ‡∏≠‡∏á‡∏´‡∏£‡∏∑‡∏≠‡∏õ‡πà‡∏≤‡∏ß‡∏Ñ‡∏±‡∏ö']
