In [1]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import gensim

In [2]:
df = pd.read_csv('nightoffullmoon-1.csv')

In [3]:
df.head()

Unnamed: 0,date,review,star,res
0,"October 3, 2020",Entertaining and somewhat difficult but not to...,5,"Giant NetworkOctober 9, 2020Thank you for your..."
1,"October 19, 2020",This is undoubtedly the best game in the app s...,5,"Giant NetworkOctober 19, 2020Thank you for you..."
2,"August 8, 2020",Love the game. Only phone game I ever spent mo...,5,"Giant NetworkAugust 10, 2020Hello. Thank you f..."
3,"May 15, 2019",I've lost all content that I paid for. Sdk log...,1,"Giant NetworkMay 15, 2019Hi, Sorry about the i..."
4,"October 1, 2020",Have only played for an hour or so but I reall...,5,"Giant NetworkOctober 9, 2020Good day to you! T..."


In [4]:
df = df.dropna()

In [5]:
rate5 = df[df['star'] == 5]

In [6]:
documents = df[df['star'] != 5]

In [7]:
documents.head()

Unnamed: 0,date,review,star,res
3,"May 15, 2019",I've lost all content that I paid for. Sdk log...,1,"Giant NetworkMay 15, 2019Hi, Sorry about the i..."
6,"May 4, 2020",this game was originally a game you could play...,2,"Giant NetworkMay 4, 2020Hello, dear Madi. Than..."
7,"October 30, 2020",The Nature Spirit boss fight is hella buggy. A...,3,"Giant NetworkOctober 30, 2020We are sorry to h..."
10,"September 10, 2020",Wish there was at least 2 other classes & path...,2,"Giant NetworkSeptember 11, 2020Thank you for s..."
11,"November 25, 2020",Good game. Still has glitches after beta. You ...,4,"Giant NetworkNovember 27, 2020We're so happy t..."


In [8]:
documents['review']

3      I've lost all content that I paid for. Sdk log...
6      this game was originally a game you could play...
7      The Nature Spirit boss fight is hella buggy. A...
10     Wish there was at least 2 other classes & path...
11     Good game. Still has glitches after beta. You ...
                             ...                        
391    1)1 star. WHY paid for full Dlc yet still need...
392    The lack of sufficent healing tiles and sudden...
394    Despite the many clear issues that the other r...
396    What the hell is your issue with the afterimag...
Name: review, Length: 192, dtype: object

In [9]:
stop = stopwords.words('english')

In [10]:
corpus=[]
for doc in documents.review:
    text=re.sub('[^a-zA-z]',' ', doc)
    text=text.lower()
    text=text.split()
    text=[PorterStemmer().stem(word) for word in text if not word in stop]
    text=' '.join(text)
    corpus.append(text)

In [11]:
documents['clean_content'] = corpus

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [12]:
processed_docs = documents.clean_content.str.split()

In [13]:
processed_docs

3      [lost, content, paid, sdk, login, failur, plea...
6      [game, origin, game, could, play, offlin, need...
7      [natur, spirit, boss, fight, hella, buggi, los...
10     [wish, least, class, path, play, realli, disap...
11     [good, game, still, glitch, beta, cannot, spec...
                             ...                        
391    [star, paid, full, dlc, yet, still, need, see,...
392    [lack, suffic, heal, tile, sudden, strong, unp...
394    [despit, mani, clear, issu, review, point, giv...
396    [hell, issu, afterimag, boss, total, health, p...
398    [quick, warn, guy, got, dlc, older, version, g...
Name: clean_content, Length: 192, dtype: object

In [14]:
dictionary=gensim.corpora.Dictionary(processed_docs)

In [15]:
dictionary.filter_extremes(no_below=5,no_above=0.2,keep_n=1000)

In [16]:
len(dictionary)

315

In [17]:
dictionary[29]

'order'

In [18]:
bow_corpus=[dictionary.doc2bow(doc) for doc in processed_docs]

In [19]:
len(bow_corpus)

192

In [20]:
bow_corpus[100]

[(20, 1),
 (27, 1),
 (53, 3),
 (70, 1),
 (89, 1),
 (90, 1),
 (97, 1),
 (104, 1),
 (113, 1),
 (136, 1),
 (166, 1),
 (181, 1),
 (184, 1),
 (195, 1),
 (218, 1),
 (256, 1),
 (262, 2)]

In [21]:
from gensim import corpora,models
tfidf=models.TfidfModel(bow_corpus)

In [22]:
corpus_tfidf=tfidf[bow_corpus]

In [23]:
corpus_tfidf[100]

[(20, 0.13805358415675822),
 (27, 0.1409455359954044),
 (53, 0.5972871652070633),
 (70, 0.1276219793141063),
 (89, 0.12525596569969452),
 (90, 0.17517220796489869),
 (97, 0.13805358415675822),
 (104, 0.20675847450021606),
 (113, 0.192241155811742),
 (136, 0.16586232408303878),
 (166, 0.1276219793141063),
 (181, 0.18037964277151278),
 (184, 0.17517220796489869),
 (195, 0.1409455359954044),
 (218, 0.23733606650462016),
 (256, 0.23733606650462016),
 (262, 0.45094910692878193)]

In [44]:
lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, 
                                             num_topics=3, 
                                             id2word = dictionary, 
                                             passes = 2, 
                                             workers=4)

In [45]:
lda_model_tfidf.log_perplexity(bow_corpus)

-6.119132324683

In [46]:
for idx, topic in lda_model_tfidf.print_topics(-1):
    print("Topic: {} Word: {}".format(idx, topic))
    print("\n")

Topic: 0 Word: 0.009*"star" + 0.008*"love" + 0.008*"version" + 0.008*"still" + 0.008*"work" + 0.008*"got" + 0.008*"gameplay" + 0.007*"log" + 0.007*"unlock" + 0.007*"nice"


Topic: 1 Word: 0.008*"need" + 0.008*"boss" + 0.008*"charact" + 0.008*"health" + 0.007*"beat" + 0.007*"deck" + 0.007*"enemi" + 0.007*"knight" + 0.007*"edit" + 0.006*"build"


Topic: 2 Word: 0.010*"content" + 0.009*"buy" + 0.008*"feel" + 0.008*"new" + 0.007*"stori" + 0.007*"paid" + 0.007*"add" + 0.007*"enjoy" + 0.007*"alreadi" + 0.007*"without"




In [32]:
#0. บอสเก่งเกิน
#1. บ่นเกมเพลย์
#2. ด่า business model ที่เป็น freemium
# ด่า business model ต้องจ่ายเพื่อปลดล็อคตัวละคร
# ด่า business model ต้องจ่ายเพื่อปลดล็อคตัวละคร และ คอนเท้นต์

In [47]:
topic=[]
for i in range(len(corpus_tfidf)):
    a = lda_model_tfidf[corpus_tfidf[i]]
    a.sort(key=lambda x: x[1],reverse=True)
    topic.append([l[0] for l in a if l[1]>0.1])

In [48]:
T = []
for t in topic:
    T.extend(t)

In [49]:
for i in set(T):
    print(f'Topic {i} have {T.count(i)} times')

Topic 0 have 61 times
Topic 1 have 98 times
Topic 2 have 102 times


In [50]:
documents['topic'] = topic

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [51]:
documents[documents['star'] == 1]['topic'].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[2]          22
[1]          11
[0]           9
[1, 2]        5
[1, 0]        3
[0, 2]        2
[2, 1]        2
[0, 1]        1
[2, 1, 0]     1
Name: topic, dtype: int64

In [52]:
documents[documents['star'] == 2]['topic'].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[1]          11
[2]           6
[0, 2]        3
[0]           3
[1, 2]        3
[2, 1]        2
[1, 0]        2
[0, 1]        2
[1, 2, 0]     1
Name: topic, dtype: int64

In [53]:
documents[documents['star'] == 3]['topic'].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[2]          9
[0]          8
[1]          6
[1, 2]       5
[2, 1]       3
[1, 2, 0]    2
[0, 2]       1
[2, 0, 1]    1
[0, 1]       1
[1, 0]       1
Name: topic, dtype: int64

In [54]:
documents[documents['star'] == 4]['topic'].value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[1]          19
[2]          19
[0]           8
[1, 2]        5
[1, 0]        3
[2, 1]        3
[0, 2]        3
[0, 1]        2
[2, 0]        1
[0, 1, 2]     1
[2, 0, 1]     1
[0, 2, 1]     1
Name: topic, dtype: int64

In [55]:
documents['res']

3      Giant NetworkMay 15, 2019Hi, Sorry about the i...
6      Giant NetworkMay 4, 2020Hello, dear Madi. Than...
7      Giant NetworkOctober 30, 2020We are sorry to h...
10     Giant NetworkSeptember 11, 2020Thank you for s...
11     Giant NetworkNovember 27, 2020We're so happy t...
                             ...                        
391    Giant NetworkMay 11, 2020Hello. Team of "Night...
392    Giant NetworkNovember 5, 2018Hi, I am coming l...
394    Giant NetworkOctober 9, 2020Thank you for your...
396    Giant NetworkAugust 21, 2020This is not an inv...
398    Giant NetworkMarch 2, 2021Please send your UID...
Name: res, Length: 192, dtype: object

In [57]:
documents.to_csv('comments.csv')