# Read data

In [55]:
import pandas
Sentiment_data = pandas.read_csv("SentimentDict.csv",encoding="utf-8-sig")

In [56]:
type(Sentiment_data)

pandas.core.frame.DataFrame

In [57]:
Sentiment_data.columns

Index(['positive', 'negative', 'not', 'degree-1', 'degree-2', 'degree-3',
       'degree-4', 'degree-5', 'degree-6'],
      dtype='object')

# Import module

In [58]:
from dcard import Dcard
import dateutil.parser
import csv
import pandas
import jieba

# Build positive & negative lexicon

In [59]:
# print positive len
positives_set=set(Sentiment_data["positive"])
print(len(positives_set))

5006


In [60]:
# print negatives len
negatives_set = set(Sentiment_data['negative'])
print(len(negatives_set))

4686


# Build negative Lexicon

In [61]:
not_set = set(Sentiment_data['not'])
print(len(not_set))

214


# Build Customized Level Lexicon

In [62]:
# degree-1 => multiply 1.8
degree_dict = {}
for word in Sentiment_data['degree-1']:
    degree_dict[word] = 1.8
print(len(degree_dict))
# 69
# degree-2 => multiply 1.6
for word in Sentiment_data['degree-2']:
    degree_dict[word] = 1.6
print(len(degree_dict))
# 112
# degree-3 => multiply 1.4
for word in Sentiment_data['degree-3']:
    degree_dict[word] = 1.4
print(len(degree_dict))
# 146
# degree-4 => multiply 1.2
for word in Sentiment_data['degree-4']:
    degree_dict[word] = 1.2
print(len(degree_dict))
# 174
# degree-5 => multiply 1.1
for word in Sentiment_data['degree-5']:
    degree_dict[word] = 1.1
print(len(degree_dict))
# 186
# degree-6 => multiply 0.9
for word in Sentiment_data['degree-6']:
    degree_dict[word] = 0.9
print(len(degree_dict))
# 213

69
112
146
174
186
213


# Processing Score

In [63]:
# check negative lexicon
def hasOpposite(wordlist):
    for word in wordlist:
        if word in not_set:
            return True
    return False

# Customized Level word: return value; else: return 1.0
def getDegree(wordlist):
    degree =1.0
    for word in wordlist:
        if word in degree_dict:
            degree = degree_dict[word]
    return degree

# Processing Test

In [64]:
import jieba

# Get score
def analyze (text):
    token = list(jieba.cut(text))
    #print(token)
    sum  = 0 
    for word in token:
        if word.lower() in positives_set:
            sum += 1
        elif word.lower() in negatives_set:
            sum -= 1
    if hasOpposite(token):
        sum = - sum
    sum = sum * getDegree(token)
        
    return sum

# analyze positive or negative
def sentiment_analysis(text):
    sentiment = 'normal'
    score =  analyze(text)
    
    #print('emotional score',score)
    if score > 0:
        sentiment = 'positive'
    elif score < 0.0:
        sentiment = 'negative'
    return sentiment

In [65]:
# test
text = '我很不高興'
print('Output：',sentiment_analysis(text))

Output： negative


# Get article by Dcard-spider

In [66]:
def keyword(metas):
    return [meta for meta in metas if meta['commentCount'] >= 2]
def dcard(forums,text):
    URL = "https://www.dcard.tw/f/dcard/p/"
    dcard = Dcard()  
    metas = dcard.forums(forums).get_metas(num=50, callback=keyword)
    posts = dcard.posts(metas).get(comments=True, links=False)
    last = {'positive' : 0 ,'title':"", 'id':"" ,'negative':0}
    context =""
    for post in posts.result():
        ans ={'positive':0,'negative':0,'normal':0}
        #print("Title:"+post['title'])
        
        ans[sentiment_analysis(post['content'])]+=1
        for comment in post['comments']:
            if(not comment['hidden']): # Analyze comment if it's not hidden
                ans[sentiment_analysis(comment['content'])]+=1
        if text =="positive":
            if ans['positive'] >last['positive'] :
                last['positive'] = ans['positive']
                last['title'] = post['title']
                last['id'] = post['id']
        else:
            if ans['negative'] >last['negative'] :
                last['negative'] = ans['negative']
                last['title'] = post['title']
                last['id'] = post['id']
    context += "Most "+text+" response:" + last['title'] + "\n" + URL + str(last['id'])
    return context

# Main coding

In [None]:
if __name__ == '__main__':
    print(dcard("fcu","positive")) #article with the most response in fcu
    print(dcard("funny","negative")) #article with the most response in funny

# Reference

In [None]:
https://github.com/mathlf2015/text_analysis/tree/master/code/sentiment/sentimentDict