In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from pythainlp import word_tokenize
from pythainlp.ulmfit import process_thai


###### Sentiment Analysis & Create Model ######

In [2]:
with open("wisesight-sentiment-master/kaggle-competition/train.txt", encoding="utf8") as f:
    texts = [line.strip() for line in f.readlines()]

with open("wisesight-sentiment-master/kaggle-competition/train_label.txt", encoding="utf8") as f:
    categories = [line.strip() for line in f.readlines()]

all_df = pd.DataFrame({"category":categories, "texts":texts})
all_df.to_csv('all_df.csv',index=False)
all_df.shape

(24063, 2)

In [3]:
all_df["processed"] = all_df.texts.map(lambda x: "|".join(process_thai(x)))
all_df["wc"] = all_df.processed.map(lambda x: len(x.split("|")))
all_df["uwc"] = all_df.processed.map(lambda x: len(set(x.split("|"))))

In [4]:
#when finding hyperparameters
from sklearn.model_selection import train_test_split
train_df, valid_df = train_test_split(all_df, test_size=0.15, random_state=1412)
train_df = train_df.reset_index(drop=True)
valid_df = valid_df.reset_index(drop=True)

In [5]:
#dependent variables
y_train = train_df["category"]
y_valid = valid_df["category"]

In [6]:
#text faetures
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

tfidf = TfidfVectorizer(tokenizer=process_thai, ngram_range=(1,2), min_df=20, sublinear_tf=True)
tfidf_fit = tfidf.fit(all_df["texts"])
text_train = tfidf_fit.transform(train_df["texts"])
text_valid = tfidf_fit.transform(valid_df["texts"])
text_train.shape, text_valid.shape



((20453, 4623), (3610, 4623))

In [7]:
#word count and unique word counts; actually might not be so useful
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler_fit = scaler.fit(all_df[["wc","uwc"]].astype(float))
print(scaler_fit.mean_, scaler_fit.var_)
num_train = scaler_fit.transform(train_df[["wc","uwc"]].astype(float))
num_valid = scaler_fit.transform(valid_df[["wc","uwc"]].astype(float))
num_train.shape, num_valid.shape

[21.94053111 18.19224536] [1152.55429082  510.60884649]


((20453, 2), (3610, 2))

In [8]:
#concatenate text and word count features
X_train = np.concatenate([num_train,text_train.toarray()],axis=1)
X_valid = np.concatenate([num_valid,text_valid.toarray()],axis=1)
X_train.shape, X_valid.shape

((20453, 4625), (3610, 4625))

In [9]:
#fit logistic regression models
model = LogisticRegression(C=2., penalty="l2", solver="liblinear", dual=False, multi_class="ovr")
model.fit(X_train,y_train)
model.score(X_valid,y_valid)

0.7318559556786703

In [10]:
probs = model.predict_proba(X_valid)
probs_df = pd.DataFrame(probs)
probs_df.columns = model.classes_
probs_df["preds"] = model.predict(X_valid)
probs_df["category"] = valid_df.category
probs_df["texts"] = valid_df.texts
probs_df["processed"] = valid_df.processed
probs_df["wc"] = valid_df.wc
probs_df["uwc"] = valid_df.uwc
probs_df["hit"] = (probs_df.preds==probs_df.category)
probs_df.head()

Unnamed: 0,neg,neu,pos,q,preds,category,texts,processed,wc,uwc,hit
0,0.50504,0.273455,0.219629,0.001876,neg,neu,‡πÄ‡∏´‡πá‡∏ô‡∏Ñ‡∏ô‡∏•‡∏ö‡πÅ‡∏≠‡∏û viu ‡∏Å‡πá‡πÄ‡∏´‡πá‡∏ô‡πÉ‡∏à‡πÅ‡∏•‡∏∞‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ‡∏à‡πÄ‡∏Ç‡∏≤‡∏ô‡∏∞‡∏Ñ‡∏∞ ‡πÅ‡∏ú‡∏•‡∏°‡∏±...,‡πÄ‡∏´‡πá‡∏ô|‡∏Ñ‡∏ô|‡∏•‡∏ö|‡πÅ‡∏≠|‡∏û|viu|‡∏Å‡πá|‡πÄ‡∏´‡πá‡∏ô‡πÉ‡∏à|‡πÅ‡∏•‡∏∞|‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ‡∏à|‡πÄ‡∏Ç‡∏≤|‡∏ô...,47,42,False
1,0.101227,0.844443,0.048554,0.005776,neu,neu,‡πÑ‡∏õ‡∏ä‡∏°‡πÑ‡∏°‡πâ‡∏Ñ‡∏¥‡∏ß‡∏Ç‡∏≠‡∏á‡πÅ‡∏ä‡∏°‡∏õ‡πå ‡πÅ‡∏•‡∏∞‡∏£‡∏≠‡∏á‡πÅ‡∏ä‡∏°‡∏õ‡πå ‡∏Å‡∏±‡∏ô‡∏à‡πâ‡∏≤! ..........,‡πÑ‡∏õ|‡∏ä‡∏°|‡πÑ‡∏°‡πâ|‡∏Ñ‡∏¥‡∏ß|‡∏Ç‡∏≠‡∏á|‡πÅ‡∏ä‡∏°‡∏õ‡πå|‡πÅ‡∏•‡∏∞|‡∏£‡∏≠‡∏á|‡πÅ‡∏ä‡∏°‡∏õ‡πå|‡∏Å‡∏±‡∏ô|‡∏à‡πâ‡∏≤|...,42,40,True
2,0.558792,0.349736,0.089218,0.002254,neg,neg,‡∏Å‡∏•‡∏∏‡πà‡∏°‡∏£‡∏ñ‡∏ã‡∏µ‡∏ß‡∏¥‡∏Ñ‡πÄ‡∏õ‡πá‡∏ô‡∏Å‡∏•‡∏∏‡πà‡∏°‡∏ó‡∏µ‡πà‡∏ô‡πà‡∏≤‡∏£‡∏≥‡∏Ñ‡∏≤‡∏ô‡∏°‡∏≤‡∏Å‡∏Å‡∏Å‡∏Å‡∏Å‡∏Å‡∏Å‡∏Å‡∏Å ‡∏≠‡∏ß...,‡∏Å‡∏•‡∏∏‡πà‡∏°|‡∏£‡∏ñ|‡∏ã‡∏µ‡∏ß‡∏¥‡∏Ñ|‡πÄ‡∏õ‡πá‡∏ô|‡∏Å‡∏•‡∏∏‡πà‡∏°|‡∏ó‡∏µ‡πà|‡∏ô‡πà‡∏≤|‡∏£‡∏≥|‡∏Ñ‡∏≤‡∏ô|‡∏°‡∏≤‡∏Å|x...,46,35,True
3,0.075085,0.563439,0.356436,0.00504,neu,neu,‡∏≠‡∏¢‡∏≤‡∏Å‡∏™‡∏ß‡∏¢‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡πÄ‡∏à‡πâ‡∏≤‡∏Ç‡∏≠‡∏á‡πÅ‡∏ö‡∏£‡∏ô‡∏î‡πå‡∏™‡∏¥‡∏Ñ‡∏∞ ‡πÄ‡∏ô‡∏¢ ‡πÇ‡∏ä‡∏ï‡∏¥‡∏Å‡∏≤ ‡πÉ‡∏ö‡∏´‡∏ô...,‡∏≠‡∏¢‡∏≤‡∏Å|‡∏™‡∏ß‡∏¢|‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô|‡πÄ‡∏à‡πâ‡∏≤‡∏Ç‡∏≠‡∏á|‡πÅ‡∏ö‡∏£‡∏ô‡∏î‡πå|‡∏™‡∏¥|‡∏Ñ‡∏∞|‡πÄ‡∏ô‡∏¢|‡πÇ‡∏ä‡∏ï‡∏¥|...,72,56,True
4,0.727432,0.265063,0.007502,3e-06,neg,neg,‡∏Ç‡πâ‡∏≤‡∏ß‡πÇ‡∏ñ‡∏•‡∏∞‡∏£‡πâ‡∏≠‡∏¢ ‡πÅ‡∏û‡∏á ‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡∏ï‡∏±‡∏Å‡πÄ‡∏õ‡πá‡∏ô‡∏à‡∏≤‡∏ô‡πÜ‡∏•‡∏∞15 ‡πÄ‡∏ï‡πá‡∏°‡∏ó‡∏µ‡πà‡∏Å...,‡∏Ç‡πâ‡∏≤‡∏ß|‡πÇ‡∏ñ|‡∏•‡∏∞|‡∏£‡πâ‡∏≠‡∏¢|‡πÅ‡∏û‡∏á|‡πÄ‡∏û‡∏£‡∏≤‡∏∞|‡∏ï‡∏±‡∏Å|‡πÄ‡∏õ‡πá‡∏ô|‡∏à‡∏≤‡∏ô|‡πÜ|‡∏•‡∏∞|15...,381,218,True


###### Create function ######

In [11]:
def assign_sentiment(pneg, pneu, ppos):
    if pneu >= 0.7:
        return 'neu'
    else:
        if max(pneg, ppos) == pneg:
            return 'neg'
        else:
            return 'pos'

In [12]:
def apply_datetime(start_time, end_time):
    x = start_time.split()[1]
    y = end_time.split()[1]
    
    
    t1 = datetime.strptime(x, "%H:%M:%S")

    t2 = datetime.strptime(y, "%H:%M:%S")

    # get difference
    delta = t2 - t1
    
    # time difference in milliseconds
    ms = delta.total_seconds() / 60
    
    return int(ms)

In [13]:
def assign_gender(x):
    male_word = ['‡∏Ñ‡∏£‡∏±‡∏ö', '‡∏Ñ‡∏±‡∏ö', '‡∏ú‡∏°']
    female_word = ['‡∏Ñ‡πà‡∏∞', '‡∏Ñ‡∏∞', '‡∏´‡∏ô‡∏π']
    for i in male_word:
        if i in x:
            return 'Male'
    for j in female_word:
        if j in x:
            return 'Female'
    return 'Undefine'

##### Prepare Data ##### 

In [14]:
import pytchat
import pandas as pd 
chat = pytchat.create(video_id="6iXz5bpLn54")

dList = []
aList = []
mList = []

i = 0
while chat.is_alive():
  for c in chat.get().sync_items():
    print(f"{c.datetime} [{c.author.name}]- {c.message}")
    dList.append(c.datetime)
    aList.append(c.author.name)
    mList.append(c.message)
    i = i+1
    if i == 1000:
      chat.terminate()
      break

# dictionary of lists 
dict = {'dateTime': dList, 'authorName': aList, 'Message': mList}

df = pd.DataFrame(dict)
df.head()

#from google.colab import files
#df.to_csv("data.csv", encoding="utf_8_sig", index=False) #cp874, #TIS-620
#files.download("data.csv")

2023-01-10 05:59:54 [Pornchai S.]- hi
2023-01-10 06:00:03 [BOY BANK]- 2
2023-01-10 06:00:04 [‡∏≠‡∏†‡∏¥‡∏ä‡∏≤‡∏ï ‡∏™‡∏±‡∏á‡∏Ç‡πå‡∏•‡∏≤‡∏¢]- :person-turqouise-waving::person-turqouise-waving::person-turqouise-waving::person-turqouise-waving::person-turqouise-waving:
2023-01-10 06:00:05 [Pornchai S.]- hi
2023-01-10 06:00:20 [Dec.]- ‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ‡∏Ñ‡πà‡∏∞‡∏∞‡∏∞‡∏∞‡∏∞
2023-01-10 06:00:23 [leo]- :face-blue-smiling:
2023-01-10 06:00:34 [Prath C]- ‡∏≠‡∏£‡∏∏‡∏ì‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏¥‡πå‡∏Ñ‡∏£‡∏±‡∏ö‡∏ó‡∏∏‡∏Å‡∏Ñ‡∏ô
2023-01-10 06:00:37 [Wisanuwan Puengtumjit]- ‡∏°‡∏≤‡∏ï‡πà‡∏≠‡∏Å‡∏±‡∏ô‡∏Ñ‡∏£‡∏±‡∏ö
2023-01-10 06:00:50 [leo]- :folded_hands:‡∏•‡∏∏‡∏á‡∏¢‡∏∏‡∏ó‡∏ò
2023-01-10 06:01:01 [leo]- :folded_hands:‡∏ó‡∏∏‡∏Å‡∏Ñ‡∏ô
2023-01-10 06:01:09 [Windy blue Mc]- Good morning ‡∏Ñ‡πà‡∏∞
2023-01-10 06:01:40 [alongkot khongphan]- ‡∏™‡∏á‡∏™‡∏≤‡∏£‡∏à‡∏±‡∏á
2023-01-10 06:01:57 [alongkot khongphan]- ‡πÄ‡∏Å‡πà‡∏á‡∏£‡∏µ‡∏ö‡πÑ‡∏õ‡πÑ‡∏´‡∏ô
2023-01-10 06:02:35 [‡∏ä‡∏¥‡∏ô‡∏à‡∏±‡∏á 1903]- ‡∏Ç‡∏±‡∏ö‡∏Ç‡∏µ‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏£‡∏

2023-01-10 06:07:50 [Í™≥H]- ‡πÄ‡∏´‡∏•‡∏∑‡∏≠‡πÅ‡∏ï‡πà‡∏£‡∏π‡∏õ‡∏ó‡∏µ‡πà‡∏°‡∏µ‡∏ó‡∏∏‡∏Å‡∏ö‡πâ‡∏≤‡∏ô
2023-01-10 06:07:53 [mdfbp pd1]- ‡∏î‡∏π‡∏´‡∏ô‡πâ‡∏≤‡∏Ñ‡∏ô‡∏Ç‡∏∂‡∏ö-‡πÑ‡∏°‡πà‡∏™‡∏•‡∏î‡∏à‡∏£‡∏¥‡∏á
2023-01-10 06:07:56 [Mangos Repent]- :smiling_face_with_smiling_eyes::tulip:
2023-01-10 06:07:58 [Devil's Gold]- ‡∏Ç‡∏≠‡πÉ‡∏´‡πâ‡∏≠‡∏µ‡∏õ‡πâ‡∏≤‡∏ó‡∏µ‡πà‡∏ä‡∏ô‡∏•‡∏π‡∏Å‡∏°‡∏£‡∏∂‡∏á‡πÇ‡∏î‡∏ô‡∏Ñ‡∏ô‡∏ä‡∏ô‡∏ï‡∏≤‡∏¢‡πÑ‡∏ß‡πÜ:pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face::pouting_face:
2023-01-10 06:07:59 [‡∏≠‡∏ß‡∏ï‡∏≤‡∏£ ‡∏≠‡∏ß‡∏ï‡∏≤‡∏£]- ‡∏™‡∏°‡∏±‡∏¢‡∏õ‡∏£‡∏∞‡∏¢‡∏∏‡∏ó‡∏ò ‡∏°‡∏µ‡∏Ñ‡∏ß‡∏≤‡∏°‡∏™‡∏∏‡∏Ç‡∏´‡∏£‡∏∑‡∏≠ ‡πÄ‡∏°‡∏∑‡πà‡∏≠‡πÑ‡∏£‡∏à‡∏∞‡πÑ‡∏õ‡∏™‡∏±‡∏Å‡∏ó‡∏µ
2023-01-10 06:0

2023-01-10 06:15:01 [‡∏ö‡∏±‡∏•‡∏•‡∏±‡∏á‡∏Å‡πå ‡∏™‡∏∏‡∏Ç‡πÄ‡∏à‡∏£‡∏¥‡∏ç]- ‡∏´‡∏°‡∏≠ ‡∏´‡∏°‡∏≠ ‡∏´‡∏°‡∏≠ ‡πÄ‡∏Æ‡πà‡∏≠
2023-01-10 06:15:01 [MOO Koingc]- ‡∏¢‡∏Å‡πÄ‡∏ß‡πâ‡∏ô‡∏°‡∏µ‡πÄ‡∏á‡∏¥‡∏ô‡∏´‡∏°‡∏≠‡∏Å‡πá‡∏à‡∏∞‡∏î‡∏π‡πÅ‡∏•‡∏î‡∏µ
2023-01-10 06:15:01 [‡∏ò‡∏ô‡∏≤‡∏†‡∏±‡∏ó‡∏£ü•∞]- @‡∏Ñ‡∏∏‡∏ì‡∏®‡∏¥‡∏£‡∏¥‡∏û‡∏±‡∏í‡∏ô‡πå‡∏à‡∏±‡∏ô‡∏ó‡∏£‡πå:hand-pink-waving::face-blue-smiling:‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ‡∏Ñ‡∏£‡∏±‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏´‡∏á‡∏©‡πå:flamingo:
2023-01-10 06:15:09 [chit chiwchiw]- ?‡∏ç‡πî
2023-01-10 06:15:11 [Nineonewan cannabis plunt psycoll]- ‡πÄ‡∏£‡∏≤‡∏ä‡∏≠‡∏ö‡∏î‡∏≤‡∏ß‡πÄ‡∏ó‡∏µ‡∏¢‡∏°‡∏°‡∏≤‡∏Å‡∏Å‡∏ß‡πà‡∏≤‡∏î‡∏≤‡∏ß‡πÅ‡∏ó‡πâ
2023-01-10 06:15:21 [‡∏ò‡∏ô‡∏≤‡∏†‡∏±‡∏ó‡∏£ü•∞]- ‡∏≠‡∏≤‡∏£‡πå‡πÄ‡∏ã‡∏ô‡πà‡∏≠‡∏•:tongue::deaf_person::honey_pot::honey_pot::honey_pot::honey_pot::honey_pot::face_savoring_food:
2023-01-10 06:15:25 [mdfbp pd1]- ‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡∏õ‡∏Å‡∏™‡∏∑‡∏à‡πà‡∏≤‡∏¢‡πÅ‡∏ï‡πà‡∏û‡∏≤‡∏£‡∏≤‡πÑ‡∏•‡πà‡∏Å‡∏•‡∏±‡∏ö
2023-01-10 06:15:30 [‡∏™‡∏∏‡∏£‡∏û‡∏£ ‡∏Å‡∏±‡∏ç‡∏ç‡∏û‡∏±‡∏ô‡∏ò‡πå]- ‡πÄ‡∏ä‡∏•‡∏ä‡∏µ‡πÉ‡∏ï‡πâ:f

2023-01-10 06:21:15 [Aor Manatsa]- 55555
2023-01-10 06:21:18 [‡∏ò‡∏ô‡∏≤‡∏†‡∏±‡∏ó‡∏£ü•∞]- ‡∏¢‡∏∏‡πÑ‡∏Å‡∏•‡πâ‡πÜ‡∏à‡∏∞‡πÑ‡∏•‡πà‡∏ï‡∏ö‡πÉ‡∏´‡πâ‡∏´‡∏°‡∏î
2023-01-10 06:21:18 [thavesak claysuwan]- ‡πÑ‡∏õ‡∏´‡∏≤‡∏ó‡∏µ‡πà‡∏®‡∏≤‡∏•‡∏≤‡∏Å‡∏•‡∏≤‡∏á‡∏à‡∏±‡∏á‡∏´‡∏ß‡∏±‡∏î‡πÄ‡∏•‡∏¢‡∏°‡∏µ‡πÇ‡∏£‡∏á‡∏´‡∏°‡∏≠‡∏û‡∏£‡πâ‡∏≠‡∏°‡∏ó‡∏∏‡∏Å‡∏´‡∏ô‡πà‡∏ß‡∏¢‡∏á‡∏≤‡∏ô‡∏£‡∏≤‡∏ä‡∏Å‡∏≤‡∏£‡∏à‡∏±‡∏á‡∏´‡∏ß‡∏±‡∏î‡∏Ñ‡∏£‡∏ö
2023-01-10 06:21:19 [‡∏™‡∏≤‡∏Å-‡∏≠‡πä‡∏≠‡∏ô-‡∏≠‡πà‡∏≠‡∏ô ‡∏à‡∏£‡∏¥‡∏á'‡∏á ‡∏ô‡∏∞]- ‡∏ï‡∏¥‡πà‡∏á
2023-01-10 06:21:22 [‡∏ô‡∏Å ‡∏ô‡∏Å]- :face-blue-smiling::face-blue-smiling::face-blue-smiling:
2023-01-10 06:21:22 [üçÅJen Jen.hk.üçÅ]- :rolling_on_the_floor_laughing::rolling_on_the_floor_laughing::rolling_on_the_floor_laughing:
2023-01-10 06:21:22 [puppet]- :face-orange-raised-eyebrow::face-orange-raised-eyebrow::face-orange-raised-eyebrow::face-orange-biting-nails:
2023-01-10 06:21:28 [‡∏ô‡πâ‡∏≠‡∏á‡πÑ‡∏≠‡∏≠‡∏∏‡πà‡∏ô ‡∏Å‡∏±‡∏ö‡∏Ñ‡∏∏‡∏ì‡∏û‡πà‡∏≠‡∏≠‡πâ‡∏ß‡∏ô]- ‡∏≠‡∏¢‡πà‡∏≤‡∏ä‡∏á‡∏°‡∏≤‡∏Å‡πÄ‡∏•‡∏¢
2023-01-10 

2023-01-10 06:25:33 [‡∏™‡∏≤‡∏ò‡∏¥‡∏ï ‡∏û‡∏á‡∏©‡πå‡πÄ‡∏™‡∏ô‡∏≤]- ‡πÅ‡∏°‡πà‡πÄ‡∏ï‡∏∑‡∏≠‡∏ô‡πÅ‡∏•‡πâ‡∏ß...‡∏•‡∏π‡∏Å‡∏ä‡∏≤‡∏¢ ‡πÑ‡∏°‡πà‡∏ü‡∏±‡∏á‡πÄ‡∏≠‡∏á
2023-01-10 06:25:33 [Kkk Pj]- ‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ‡∏Ñ‡∏£‡∏±‡∏ö
2023-01-10 06:25:33 [‡∏ì‡∏û‡∏á‡∏©‡∏Å‡∏£ ‡πÅ‡∏Å‡πâ‡∏ß‡∏à‡∏±‡∏ô‡∏ó‡∏£‡πå]- ‡∏™‡∏ß‡πâ‡∏™‡∏î‡∏µ‡∏ï‡∏≠‡∏ô‡πÄ‡∏ä‡πâ‡∏≤‡∏Ñ‡∏£‡∏±‡∏ö‡∏û‡∏µ‡πà‡∏¢‡∏∏‡∏ó‡∏ò ‡∏ô‡πâ‡∏≠‡∏á‡πÑ‡∏ö‡∏£‡πå‡∏ó
2023-01-10 06:25:34 [‡πÑ‡∏≠‡πâ‡∏´‡∏ô‡∏∏‡πà‡∏°‡∏£‡∏ñ‡πÑ‡∏ñüöúüöú]- ‡∏™‡∏µ‡∏≠‡∏∞‡πÑ‡∏£
2023-01-10 06:25:37 [Maria]- ‡∏Å‡∏∞‡∏•‡∏≤ ‡∏ï‡πâ‡∏°‡∏ï‡∏∏‡πã‡∏ô‡πÅ‡∏°‡πâ‡πÅ‡∏ï‡πà‡πÄ‡∏î‡πá‡∏Å
2023-01-10 06:25:42 [puppet]- ‡πÄ‡∏ä‡∏∑‡πà‡∏≠‡πÇ‡∏à‡∏£ ... ‡∏•‡∏á‡∏ó‡∏∏‡∏ô‡∏Å‡∏±‡∏ö‡πÇ‡∏à‡∏£ ....:face-orange-biting-nails:
2023-01-10 06:25:45 [go inno]- :face-green-smiling:‡∏°‡∏µ ‡∏£‡∏±‡∏ê‡∏ö‡∏≤‡∏• ‡πÑ‡∏ß‡πâ‡∏ó‡∏≥‡πÑ‡∏° :face-green-smiling:
2023-01-10 06:25:45 [‡∏•‡∏π‡∏Å‡∏ä‡∏≤‡∏ß‡∏ô‡∏≤ ‡∏ä‡∏¥‡∏ì‡∏£‡∏¥‡∏ô‡∏ó‡∏£‡πå]- ‡∏¢‡∏∏‡∏Ñ‡∏Å‡∏≤‡∏£‡∏ó‡∏∏‡∏à‡∏£‡∏¥‡∏ï‡πÄ‡∏ü‡∏∑‡πà‡∏≠‡∏á‡∏ü‡∏π‡∏™‡∏π‡∏á‡∏™‡∏∏‡∏î//‡∏Å‡πÄ‡∏£‡∏∑‡∏≠‡∏£‡∏ö‡∏¢‡∏±‡∏á‡∏•‡

2023-01-10 06:30:02 [‡∏Ñ‡∏ô‡∏î‡∏µ ‡∏ó‡∏µ‡πà ‡πÅ‡∏™‡∏ô‡πÄ‡∏•‡∏ß]- ‡∏´‡∏•‡∏±‡∏Å‡∏ê‡∏≤‡∏ô‡∏ä‡∏±‡∏î‡πÅ‡∏ï‡πà‡∏à‡∏±‡∏ö‡πÑ‡∏°‡πà‡πÄ‡∏Ñ‡∏¢‡πÑ‡∏î‡πâ
2023-01-10 06:30:03 [Hikikomori Gaming]- ‡∏Å‡∏≥ ‡∏á‡∏≤‡∏ô‡πÇ‡∏Ñ‡∏ï‡∏£‡∏á‡πà‡∏≤‡∏¢ ‡πÅ‡∏Ñ‡πà‡∏î‡∏π‡∏¢‡∏π‡∏ó‡∏π‡∏õ
2023-01-10 06:30:06 [‡∏ä‡∏¥‡∏ô‡∏à‡∏±‡∏á 1903]- ‡πÑ‡∏≠‡πâ‡∏ó‡∏µ‡πà‡∏°‡∏±‡∏ô‡∏ö‡∏≠‡∏Å‡πÉ‡∏´‡πâ‡∏î‡∏π‡πÄ‡∏ß‡πá‡∏ö‡πÇ‡∏Ü‡∏©‡∏ì‡∏≤‡∏õ‡πà‡∏∞
2023-01-10 06:30:06 [TharthipRunny]- ‡πÄ‡∏à‡∏≠‡∏ö‡πà‡∏≠‡∏¢
2023-01-10 06:30:06 [‡πÄ‡∏î‡πà‡∏ô ‡∏õ‡∏∑‡∏ô‡πÉ‡∏´‡∏ç‡πà]- ‡∏ö‡∏≤‡∏á‡∏Ñ‡∏ô‡πÇ‡∏î‡∏ô‡∏´‡∏•‡∏≠‡∏Å‡πÄ‡∏õ‡πá‡∏ô‡∏•‡πâ‡∏≤‡∏ô ‡∏¢‡∏±‡∏á‡πÑ‡∏°‡πà‡∏Ñ‡∏¥‡∏î‡∏™‡∏±‡πâ‡∏ô:disappointed_face::disappointed_face:
2023-01-10 06:30:07 [‡∏≠‡∏£‡∏¥‡∏™‡∏£‡∏≤ ‡∏û‡∏±‡∏ô‡∏ò‡∏∞‡πÑ‡∏ä‡∏¢]- ‡πÄ‡∏™‡∏µ‡∏¢‡∏î‡∏≤‡∏¢‡∏≠‡∏ô‡∏≤‡∏Ñ‡∏ï‡πÄ‡∏î‡πá‡∏Å‡πÉ‡∏Ñ‡∏£‡∏£‡∏±‡∏ö‡∏ú‡∏¥‡∏î‡∏ä‡∏≠‡∏ö
2023-01-10 06:30:09 [‡∏ó‡∏¥‡∏û‡∏¢‡πå‡∏ß‡∏£‡∏¥‡∏ô‡∏ó‡∏£ ‡∏ä‡∏¢‡∏ó‡∏±‡∏ï‡∏†‡∏π‡∏°‡∏¥‡∏£‡∏±‡∏ï‡∏ô‡πå]- ‡πÄ‡∏°‡∏∑‡πà‡∏≠‡πÑ‡∏´‡∏£‡πà‡∏ï‡∏≥‡∏£‡∏ß‡∏à‡∏à‡∏∞‡∏à‡∏±‡∏ö‡∏Ñ‡∏ô‡πÄ‡∏•‡∏ß‡∏û‡∏ß‡∏Å‡∏ô‡∏µ‡πâ‡∏°‡∏≤‡∏ã‡∏∞‡

2023-01-10 06:33:24 [OoBbMm]- ‡πÄ‡∏î‡πá‡∏Å‡∏ö‡∏≤‡∏á‡∏Ñ‡∏ô‡∏ï‡∏≠‡∏ô‡πÄ‡∏î‡πá‡∏Å‡∏Ç‡∏ß‡∏ö‡∏™‡∏≠‡∏á‡∏Ç‡∏ß‡∏ö‡∏ï‡∏¥‡∏î‡∏û‡πà‡∏≠‡∏´‡∏ô‡∏±‡∏Å‡πÄ‡∏•‡∏¢‡∏ô‡∏∞‡πÄ‡∏´‡πá‡∏ô‡∏û‡πà‡∏≠‡πÄ‡∏•‡∏¥‡∏Å‡∏á‡∏≤‡∏ô‡∏Å‡∏•‡∏±‡∏ö‡∏ö‡πâ‡∏≤‡∏ô‡∏ß‡∏¥‡πà‡∏á‡∏´‡∏≤‡∏û‡πà‡∏≠‡∏ï‡∏•‡∏≠‡∏î
2023-01-10 06:33:25 [Amp mie]- ‡∏ï‡∏≥‡∏£‡∏ß‡∏à‡∏à‡∏±‡∏ö‡∏°‡∏¥‡∏à‡∏â‡∏≤‡∏ä‡∏µ‡∏û‡πÑ‡∏î‡πâ‡∏£‡∏∂‡∏¢‡∏±‡∏á?
2023-01-10 06:33:30 [Prem PIANKAN]- ‡∏ó‡∏≥‡πÑ‡∏°‡πÇ‡∏î‡∏ô‡∏´‡∏•‡∏≠‡∏Å‡∏á‡πà‡∏≤‡∏¢‡∏Ç‡∏ô‡∏≤‡∏î‡∏ô‡∏µ‡πâ‡∏ô‡πâ‡∏≠‡∏á‡πÄ‡∏≠‡πâ‡∏¢
2023-01-10 06:33:31 [ùôäùô´ùôöùôß` ùòøùô§ùô®ùôö]- ‡∏°‡∏±‡∏ô‡∏Å‡πá‡πÄ‡∏õ‡πá‡∏ô‡πÅ‡∏Ñ‡πà‡∏Å‡∏•‡∏≠‡∏∏‡∏ö‡∏≤‡∏¢ (‡πÉ‡∏ä‡πâ‡∏Å‡∏≤‡∏£‡∏î‡∏π‡∏Ñ‡∏•‡∏¥‡∏õ ‡∏´‡∏£‡∏∑‡∏≠ ‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÄ‡∏õ‡πá‡∏ô‡∏£‡∏π‡∏õ‡πÅ‡∏ö‡∏ö‡∏≠‡∏∑‡πà‡∏ô) ‡∏ó‡∏µ‡πà‡∏Ñ‡∏ô‡πÄ‡∏™‡∏µ‡∏¢‡∏Å‡∏±‡∏ô‡πÄ‡∏¢‡∏≠‡∏∞ ‡πÄ‡∏û‡∏£‡∏≤‡∏∞ ‡∏•‡∏á‡∏ó‡∏∏‡∏ô‡∏Ñ‡∏£‡∏±‡πâ‡∏á‡πÅ‡∏£‡∏Å‡πÅ‡∏•‡πâ‡∏ß‡πÑ‡∏î‡πâ‡πÄ‡∏á‡∏¥‡∏ô‡∏à‡∏£‡∏¥‡∏á ‡πÄ‡∏•‡∏¢‡∏°‡∏µ‡∏Ñ‡∏ß‡∏≤‡∏°‡πÇ‡∏•‡∏† ‡πÉ‡∏™‡πà‡πÄ‡∏á‡∏¥‡∏ô‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏ï‡πà‡∏≠
2023-01-10 06:33:33 [‚öíÔ∏èheadcore war‚öíÔ∏è]-

2023-01-10 06:36:26 [CnkL]- ‡πÇ‡∏õ‡∏£‡πÑ‡∏ü‡∏•‡πå‡∏õ‡∏•‡∏≠‡∏°‡∏ñ‡∏∂‡∏á‡πÄ‡∏õ‡∏¥‡∏î‡∏Å‡πá‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà‡∏´‡∏ô‡πâ‡∏≤‡∏à‡∏£‡∏¥‡∏á‡∏Ñ‡∏±‡∏ö‡πÅ‡∏ö‡∏ö‡∏ô‡∏µ‡πâ
2023-01-10 06:36:27 [HengHeng 2022]- ‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡πÑ‡∏£‡πâ‡∏Å‡∏≤‡∏£‡∏Ñ‡∏ß‡∏ö‡∏Ñ‡∏∏‡∏° ‡πÄ‡∏´‡∏¢‡∏∑‡∏µ‡∏≠‡πÇ‡∏ã‡πÄ‡∏ä‡∏µ‡∏¢‡∏•‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡∏∂‡πâ‡∏ô 24‡∏ä‡∏±‡πà‡∏ß‡πÇ‡∏°‡∏á‡∏ï‡∏¥‡∏î‡πÅ‡∏ï‡πà‡∏°‡∏∑‡∏≠‡∏ñ‡∏∑‡∏≠
2023-01-10 06:36:27 [Ketsanee W]- ‡πÑ‡∏°‡πà‡∏ï‡πâ‡∏≠‡∏á‡∏õ‡∏¥‡∏î‡∏Ñ‡πà‡∏∞ ‡πÄ‡∏õ‡∏¥‡∏î‡πÄ‡∏•‡∏¢ ‡∏≠‡∏¥‡∏û‡∏ß‡∏Å‡∏™‡∏¥‡∏ó‡∏ò‡∏¥‡∏°‡∏ô‡∏∏‡∏©‡∏¢‡πå ‡∏≠‡∏¢‡πà‡∏≤‡∏°‡∏≤‡πÇ‡∏•‡∏Å‡∏™‡∏ß‡∏¢‡∏ô‡∏∞
2023-01-10 06:36:28 [KR-150]- ‡πÇ‡∏à‡∏£‡∏ô‡∏≠‡∏Å ‡πÇ‡∏à‡∏£‡πÉ‡∏ô
2023-01-10 06:36:28 [thavesak claysuwan]- ‡∏£‡∏±‡∏ê. ‡∏•‡πâ‡∏°‡πÄ‡∏´‡∏•‡∏ß ‡∏Ñ‡∏ô‡πÑ‡∏°‡πà‡∏Å‡∏•‡∏±‡∏ß‡∏Å‡∏è‡∏´‡∏°‡∏≤‡∏¢
2023-01-10 06:36:31 [Ldüåµ]- ‡∏ú‡∏ô‡∏á.:megaphone::gorilla:
2023-01-10 06:36:35 [toptop top]- ‡∏ï‡∏£. ‡∏ï‡∏≤‡∏°‡∏´‡∏•‡∏±‡∏á‡∏°‡∏¥‡∏à‡∏â‡∏≤‡∏ä‡∏µ‡∏û ‡∏ï‡∏•‡∏≠‡∏î((‡πÅ‡∏•‡∏∞‡πÑ‡∏°‡πà‡πÄ‡∏Ñ‡∏¢‡∏à‡∏±‡∏ö‡πÑ‡∏î‡πâ)) ((‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏£‡∏±‡πà‡∏ß‡πÑ‡∏´‡∏•

2023-01-10 06:39:44 [papey]- ‡∏™‡∏á‡∏™‡∏≤‡∏£‡∏ô‡πâ‡∏≠‡∏á‡∏Ñ‡∏£‡∏±‡∏ö
2023-01-10 06:39:44 [IloYos]- ‡∏Ñ‡∏ô‡∏ó‡∏µ‡πà‡∏Ñ‡∏∏‡∏¢‡∏ó‡∏≤‡∏á ‡∏ó‡∏£‡∏®. ‡∏°‡∏±‡∏ô‡πÑ‡∏°‡πà‡πÉ‡∏Ç‡πà‡∏Ñ‡∏ô‡πÑ‡∏ó‡∏¢‡πÄ‡∏û‡∏£‡∏≤‡∏∞‡∏°‡∏±‡∏ô‡∏û‡∏π‡∏î‡πÑ‡∏°‡πà‡∏ä‡∏±‡∏î
2023-01-10 06:39:45 [Raccoon Snitch]- ‡πÄ‡∏Ñ‡∏£‡∏Ñ‡πà‡∏∞ ‡∏û‡πà‡∏≠‡∏á
2023-01-10 06:39:46 [‡πÄ‡∏û‡∏ä‡∏£‡∏£‡∏±‡∏ï‡∏ô‡πå ‡∏õ‡∏±‡∏ç‡∏ç‡∏≤‡∏¢‡∏á]- ‡∏¢‡∏±‡∏á‡∏°‡∏µ‡∏û‡∏ß‡∏Å‡πÅ‡∏û‡∏Ñ‡∏™‡∏ö‡∏∏‡πà‡∏≠‡∏µ‡∏Å ‡πÅ‡∏•‡∏∞‡∏û‡∏ß‡∏Å‡∏£‡∏±‡∏ö‡∏á‡∏≤‡∏ô‡∏°‡∏≤‡∏ó‡∏≥‡∏ó‡∏µ‡πà‡∏ö‡πâ‡∏≤‡∏ô
2023-01-10 06:39:47 [‡∏Ñ‡∏ë‡∏≤‡∏ß‡∏∏‡∏í ‡∏ß‡∏¥‡∏£‡∏¥‡∏¢‡∏∞‡∏Å‡∏∏‡∏•]- ‡∏Ñ‡∏ô‡∏ó‡∏µ‡πà‡∏¢‡∏∏‡∏î‡∏≠‡∏≥‡∏ô‡∏≤‡∏à‡∏°‡∏≤‡πÄ‡∏•‡πà‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏°‡∏∑‡∏≠‡∏á‡πÄ‡∏´‡πá‡∏ô‡∏°‡∏≤‡∏´‡∏•‡∏≤‡∏¢‡∏£‡∏≤‡∏¢‡πÑ‡∏õ‡πÑ‡∏°‡πà‡∏£‡∏≠‡∏î‡∏ä‡∏±‡∏Å‡∏£‡∏≤‡∏¢‡∏û‡∏ß‡∏Å‡∏£‡∏¥‡∏ß‡∏£‡πâ‡∏≠‡∏Å‡πá‡πÄ‡∏•‡∏µ‡∏¢‡∏à‡∏ô‡∏´‡∏≥‡∏à‡∏∞‡πÄ‡∏õ‡∏∑‡πà‡∏≠‡∏¢‡∏Å‡πá‡∏´‡∏ß‡∏±‡∏á‡∏à‡∏∞‡∏°‡∏µ‡∏¢‡∏î‡∏ñ‡∏≤‡∏ö‡∏±‡∏•‡∏î‡∏≤‡∏®‡∏±‡∏Å‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏Å‡πá-‡∏Ñ-‡∏Ñ‡∏≠‡∏ï‡∏Å‡∏ô‡∏¥‡∏Ñ‡∏ô‡πÑ‡∏õ‡πÄ‡∏ä‡∏µ‡∏¢‡∏£‡πÅ‡∏Ñ‡πà‡∏î‡∏π‡∏ö‡∏≠‡∏•-‡∏°‡∏ß‡∏¢-‡πÑ‡∏°

2023-01-10 06:41:33 [big]- ‡∏°‡∏±‡∏ô‡∏î‡πà‡∏≤‡∏™‡∏∏‡∏î‡∏¢‡∏≠‡∏î‡πÅ‡∏•‡πâ‡∏ß‡∏Ñ‡∏£‡∏±‡∏ö‡πÑ‡∏≠‡πâ‡∏û‡∏ß‡∏Å‡∏ô‡∏µ‡πâ
2023-01-10 06:41:33 [P. Lam]- ‡∏Ñ‡∏¥‡∏î‡∏ß‡πà‡∏≤ ‡∏ï‡∏£ ‡∏à‡∏∞‡∏à‡∏±‡∏ö‡πÑ‡∏î‡πâ‡∏°‡∏±‡πä‡∏¢
2023-01-10 06:41:33 [‡∏õ‡∏†‡∏≤‡∏ß‡∏¥‡∏ô]- ‡∏£‡∏≤‡∏¢‡πÑ‡∏î‡πâ‡πÄ‡∏™‡∏£‡∏¥‡∏°‡∏ó‡∏µ‡πÑ‡∏î‡πâ‡∏ï‡∏±‡∏á‡∏à‡∏¥‡∏á‡∏Ñ‡∏∑‡∏ô‡∏•‡πâ‡∏≤‡∏á‡∏à‡∏≤‡∏ô‡πÄ‡∏°‡∏∑‡πà‡∏≠‡∏Ñ‡∏∑‡∏ô‡πÑ‡∏î‡πâ‡∏°‡∏≤500
2023-01-10 06:41:34 [‡∏ò‡∏µ‡∏£‡∏∞‡∏û‡∏á‡∏©‡πå ‡∏ó‡∏π‡∏•‡πÄ‡∏´‡∏•‡∏∑‡∏≠]- ‡πÉ‡∏à‡∏°‡∏∂‡∏á‡∏°‡∏±‡∏ô‡πÇ‡∏´‡∏î‡∏°‡∏≤‡∏Å‡πÑ‡∏≠‡πâ‡πÅ‡∏≠‡∏î‡∏°‡∏¥‡∏ô
2023-01-10 06:41:35 [ Phoenix]- ‡∏Ñ‡∏ô‡∏ó‡∏≥‡∏ö‡∏≤‡∏õ‡∏û‡∏ß‡∏Å‡πÑ‡∏°‡πà‡πÑ‡∏î‡πâ‡∏ï‡∏≤‡∏¢‡∏î‡∏µ‡∏´‡∏•‡∏≠‡∏Å ‡∏ó‡∏≥‡πÄ‡∏•‡∏ß
2023-01-10 06:41:35 [ÍßÅ ‡πÄ‡∏à‡πâ‡∏≤‡πÄ‡∏°‡∏∑‡∏≠‡∏á‡πÄ‡∏Ç‡πâ‡∏≤‡∏õ‡πà‡∏≤ ÍßÇ]- ‡∏à‡∏±‡∏ö‡πÑ‡∏ß‡πÜ
2023-01-10 06:41:36 [Jarinya Dawan]- ‡∏Ç‡∏≠‡πÉ‡∏´‡πâ‡∏à‡∏±‡∏ö‡∏°‡∏±‡∏ô‡πÑ‡∏î‡πâ 
2023-01-10 06:41:36 [‡πÄ‡∏à‡∏£‡∏¥‡∏ç‡∏£‡∏∏‡πà‡∏á ‡∏à‡∏∏‡πâ‡∏¢‡πÄ‡∏à‡∏£‡∏¥‡∏ç]- ‡∏ä‡∏±‡πà‡∏ß‡∏°‡∏≤‡∏Å‡∏ó‡∏≥‡πÅ‡∏ö‡∏ö‡∏ô‡∏µ‡πâ
2023-01-10 06:41:37 [Bird-Bird Channel]

Unnamed: 0,dateTime,authorName,Message
0,2023-01-10 05:59:54,Pornchai S.,hi
1,2023-01-10 06:00:03,BOY BANK,2
2,2023-01-10 06:00:04,‡∏≠‡∏†‡∏¥‡∏ä‡∏≤‡∏ï ‡∏™‡∏±‡∏á‡∏Ç‡πå‡∏•‡∏≤‡∏¢,:person-turqouise-waving::person-turqouise-wav...
3,2023-01-10 06:00:05,Pornchai S.,hi
4,2023-01-10 06:00:20,Dec.,‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ‡∏Ñ‡πà‡∏∞‡∏∞‡∏∞‡∏∞‡∏∞


##### Assign Sentiment ######

In [15]:
df["processed"] = df.Message.map(lambda x: "|".join(process_thai(x)))
df["wc"] = df.processed.map(lambda x: len(x.split("|")))
df["uwc"] = df.processed.map(lambda x: len(set(x.split("|"))))

In [16]:
#text faetures
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

tfidftext = tfidf_fit.transform(df["Message"])

In [17]:
#word count and unique word counts; actually might not be so useful
from sklearn.preprocessing import StandardScaler

num_valid = scaler_fit.transform(df[["wc","uwc"]].astype(float))

data_valid = np.concatenate([num_valid,tfidftext.toarray()],axis=1)

In [18]:
x_probs = model.predict_proba(data_valid)
x_probs_df = pd.DataFrame(x_probs)
x_probs_df.columns = model.classes_
x_probs_df.head(20)

Unnamed: 0,neg,neu,pos,q
0,0.082666,0.753584,0.153079,0.010671
1,0.072436,0.740483,0.147865,0.039216
2,0.008875,0.834388,0.154073,0.002663
3,0.082666,0.753584,0.153079,0.010671
4,0.033912,0.887352,0.054793,0.023944
5,0.012179,0.833004,0.150924,0.003893
6,0.025658,0.565131,0.38394,0.025271
7,0.032979,0.77715,0.141246,0.048625
8,0.021335,0.80111,0.173738,0.003817
9,0.043666,0.725294,0.226912,0.004127


In [19]:
df['preds'] = x_probs_df.apply(lambda x: assign_sentiment(x['neg'], x['neu'], x['pos']), axis=1)

##### Assign Time of Living #####

In [28]:
start_time = (min(df['dateTime']).split())[1]
end_time = (max(df['dateTime']).split())[1]

In [29]:
from datetime import *
# convert time string to datetime
t1 = datetime.strptime(start_time, "%H:%M:%S")
print('Start time:', t1.time())

t2 = datetime.strptime(end_time, "%H:%M:%S")
print('End time:', t2.time())

# get difference
delta = t2 - t1

# time difference in seconds
print(f"Time difference is {delta.total_seconds()} seconds")

# time difference in milliseconds
ms = delta.total_seconds() / 60
print(f"Time difference is {ms} minutes")

Start time: 05:59:54
End time: 06:42:59
Time difference is 2585.0 seconds
Time difference is 43.083333333333336 minutes


In [30]:
df["mintime"] = min(df['dateTime'])
df.head()

Unnamed: 0,dateTime,authorName,Message,processed,wc,uwc,preds,time2,NewCol,Gender,mintime
0,2023-01-10 05:59:54,Pornchai S.,hi,hi,1,1,neu,2023-01-10 05:59:54,0,Undefine,2023-01-10 05:59:54
1,2023-01-10 06:00:03,BOY BANK,2,2,1,1,neu,2023-01-10 05:59:54,0,Undefine,2023-01-10 05:59:54
2,2023-01-10 06:00:04,‡∏≠‡∏†‡∏¥‡∏ä‡∏≤‡∏ï ‡∏™‡∏±‡∏á‡∏Ç‡πå‡∏•‡∏≤‡∏¢,:person-turqouise-waving::person-turqouise-wav...,:|person-turqouise-waving|::|person-turqouise-...,11,3,neu,2023-01-10 05:59:54,0,Undefine,2023-01-10 05:59:54
3,2023-01-10 06:00:05,Pornchai S.,hi,hi,1,1,neu,2023-01-10 05:59:54,0,Undefine,2023-01-10 05:59:54
4,2023-01-10 06:00:20,Dec.,‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ‡∏Ñ‡πà‡∏∞‡∏∞‡∏∞‡∏∞‡∏∞,‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ|‡∏Ñ‡πà‡∏∞|xxrep,3,3,neu,2023-01-10 05:59:54,0,Female,2023-01-10 05:59:54


In [31]:
df['timeinlive'] = df.apply(lambda x: apply_datetime(x['mintime'], x['dateTime']), axis=1)

In [32]:
df['timeinlive'].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43], dtype=int64)

##### Assign Gender #####

In [33]:
df['Gender'] = df.apply(lambda x: assign_gender(x['Message']), axis=1)

##### Initial DataFrame for graph ######

In [34]:
df_bar = pd.DataFrame({'count' : df.groupby( ["preds"] ).size()}).reset_index()
df_graphbar = pd.DataFrame({'count' : df.groupby( ['timeinlive','preds'] ).size()}).reset_index()
df_graphgender = pd.DataFrame({'count' : df.groupby(['Gender']).size()}).reset_index()
df_line = df[['timeinlive', 'Gender']].pivot_table(index="timeinlive", columns="Gender",  aggfunc=len).fillna(0).astype(int).reset_index()
df_line['Total'] = df_line['Female'] + df_line['Male'] + df_line['Undefine']
df_graphpie = pd.DataFrame({'count' : df.groupby( ['preds'] ).size()}).reset_index()
df_people = pd.DataFrame({'count' : df.groupby(['authorName']).size()})
df_graphpeople = df_people['count'].sort_values(ascending=False).head(10).reset_index()

##### Dashboard by Dash #####

In [36]:
from dash import Dash, dcc, html, Input, Output, ctx
from jupyter_dash import JupyterDash
import plotly.express as px
import pandas as pd

sentiment = ['pos', 'neg', 'neu']

app = JupyterDash(__name__)

fig = px.line(df_line, x="timeinlive", y="Total", text="Total")
fig.update_traces(textposition='top center')

fig2 = px.bar(df_graphbar, x="preds", y="count",color="preds",color_discrete_sequence=["red", "gray","green"])

fig3 = px.bar(df_graphgender, x="Gender", y="count", color="Gender")

fig4 = px.pie(df_graphpie, values="count", names="preds", hole=.3)

fig5 = px.bar(df_graphpeople, x="authorName", y="count", text="count")

app.layout = html.Div([
    
    html.Div(children=[
        html.H1(children='Total #Commend by time'),

        html.Div(children='''
               The sentiment of sentense.
        '''),
        
        dcc.Checklist(
                  id = "checklist1",
                  options=[{'label': i, 'value': i} for i in sentiment],
                  value=['pos', 'neg', 'neu'], #Initial values,
                  inline=True),

        dcc.Graph(
            id='totalbytime-graph1',
            figure=fig, clear_on_unhover=True, clickData=None
        ),
        
        html.H1(children='%Contribution of Pos-Neu-Neg sentiment'),
        
        dcc.Graph(
            id='cont-graphpie',
            figure=fig4
        ),
        
        dcc.RangeSlider(
            min(df['timeinlive']),
            max(df['timeinlive'])+1,
            1,
            value=[min(df['timeinlive']), max(df['timeinlive'])],
            id='range-slider')
    ],style={'padding': 10, 'flex': 1}),
    
    html.Div(children=[
        html.H1(children='Total #Commend by Gender'),
        html.Div(children='''
            Click to clear filter.
        '''),
        html.Button('Clear Filter', id='btn1', n_clicks=0),
        
        dcc.Graph(
            id='gender-graph',
            figure=fig3, clickData=None
        ),
        
        html.H1(children='Top #Commend by Authorname'),
        
        dcc.Graph(
            id='topcommend-graph',
            figure=fig5
        )

    ],style={'padding': 10, 'flex': 1})
    ,
],style={'display': 'flex', 'flexDirection': 'row', 'flex-wrap': 'wrap'})

@app.callback(
    Output('totalbytime-graph1', 'figure'),
    Input('checklist1', 'value'))

def update_fig1(x):
    n_df_line = df[(df['preds'].isin(x))][['timeinlive','preds']].groupby('timeinlive').size().reset_index()
    n_df_line.columns = ['Time', 'Total']
    fig = px.line(n_df_line, x="Time", y="Total",text="Total")
    fig.update_traces(textposition='top center')
    return fig

@app.callback(
    Output('gender-graph', 'figure'),
    Input('checklist1', 'value'),
    Input('totalbytime-graph1','hoverData'),
    Input('totalbytime-graph1', 'clickData'),
    Input('btn1', 'n_clicks'))

def update_fig2(x, hover_data, click_data, n_click):
    if ctx.triggered_id == "btn1":
        n_df_graphgender = df[(df['preds'].isin(x))][['Gender','preds']].groupby('Gender').size().reset_index()
        n_df_graphgender.columns = ['Gender', 'Total']
        fig3 = px.bar(n_df_graphgender, x="Gender", y="Total", color="Gender")
        return fig3
    if hover_data is None:
        if click_data is None:
            n_df_graphgender = df[(df['preds'].isin(x))][['Gender','preds']].groupby('Gender').size().reset_index()
            n_df_graphgender.columns = ['Gender', 'Total']
            fig3 = px.bar(n_df_graphgender, x="Gender", y="Total", color="Gender")
            return fig3
        else:
            Specific_time = click_data['points'][0]['x']
            n_df_graphgender = df[(df['preds'].isin(x)) & (df['timeinlive'] == Specific_time)][['Gender','preds']].groupby('Gender').size().reset_index()
            n_df_graphgender.columns = ['Gender', 'Total']
            fig3 = px.bar(n_df_graphgender, x="Gender", y="Total", color="Gender")
            return fig3
    else:
        Specific_time = hover_data['points'][0]['x']
        n_df_graphgender = df[(df['preds'].isin(x)) & (df['timeinlive'] == Specific_time)][['Gender','preds']].groupby('Gender').size().reset_index()
        n_df_graphgender.columns = ['Gender', 'Total']
        fig3 = px.bar(n_df_graphgender, x="Gender", y="Total", color="Gender")
        return fig3

@app.callback(
    Output('cont-graphpie', 'figure'),
    Output('topcommend-graph', 'figure'),
    Input('range-slider', 'value'),
    Input('checklist1', 'value'))

def updatefig3(range_data, x):
    min_time = range_data[0]
    max_time = range_data[1]
    n_df_pie = df[(df['timeinlive'] >= min_time) & (df['timeinlive'] <= max_time)][['preds']].groupby('preds').size().reset_index()
    n_df_pie.columns = ['preds', 'count']
    fig4 = px.pie(n_df_pie, values="count", names="preds", hole=.3)
    n_df_people = pd.DataFrame({'count' : df[(df['timeinlive'] >= min_time) & (df['timeinlive'] <= max_time) & df['preds'].isin(x)].groupby(['authorName']).size()})
    n_df_graphpeople = n_df_people['count'].sort_values(ascending=False).head(10).reset_index()
    fig5 = px.bar(n_df_graphpeople, x="authorName", y="count", text="count")
    return fig4, fig5

@app.callback(
    Output('totalbytime-graph1', 'clickData'),
    Input('btn1', 'n_clicks'))
def clear_clickdata(x):
    return None


if __name__ == '__main__':
    app.run_server(debug=True, port=8054)

Dash app running on http://127.0.0.1:8054/
