# Sentiment Analysis
* https://westus2.dev.cognitive.microsoft.com/docs/services/TextAnalytics-v2-1/operations/56f30ceeeda5650db055a3c9

In [1]:
textAnalyticsEndpoint = 'textanalytics0701.cognitiveservices.azure.com' # https:// は不要
textAnalyticsKey = 'xxxx'

In [2]:
import urllib.parse, http.client, urllib.request, urllib.error, json

In [3]:
headers = {
    'Content-Type' : 'application/json',
    'Ocp-Apim-Subscription-Key' : textAnalyticsKey,
    'Accept' : 'application/json'
}

body = {
    'documents' : [
      {
          'language' : 'en',
          'id' : '1',
          'text' : 'Wow! I am loving this course!'
      },
      {
          'language' : 'en',
          'id' : '2',
          'text' : 'This course is not working for me right now.'
      }         
    ]
}

params = urllib.parse.urlencode({})

In [4]:
try:
    conn = http.client.HTTPSConnection(textAnalyticsEndpoint)
    conn.request('POST', '/text/analytics/v2.1/sentiment?%s' % params, str(body), headers)
    response = conn.getresponse()
    jsonData = response.read().decode('UTF-8') # jsonDataを作成
    print(jsonData)
    data = json.loads(jsonData) # Json文字列を辞書に変換
    print(data)
    for document in data['documents']:
        sentiment = 'positive'
        if document['score'] <= 0.5:
            sentiment = 'negative'
        print('Document ' + document['id'] + ' has a ' + sentiment + ' sentiment.')
    conn.close()
except Exception as ex:
    print(ex)

{"documents":[{"id":"1","score":0.97856038808822632},{"id":"2","score":0.004374384880065918}],"errors":[]}
{'documents': [{'id': '1', 'score': 0.9785603880882263}, {'id': '2', 'score': 0.004374384880065918}], 'errors': []}
Document 1 has a positive sentiment.
Document 2 has a negative sentiment.


# Key Phrases

In [5]:
try:
    conn = http.client.HTTPSConnection(textAnalyticsEndpoint)
    conn.request('POST', '/text/analytics/v2.1/keyPhrases%s' % params, str(body), headers)
    response = conn.getresponse()
    jsonData = response.read().decode('UTF-8')
    data = json.loads(jsonData)
    print(data)
    for document in data['documents']:
        print('Document ' + document['id'] + ' has these key phrases:')
        for phrase in document['keyPhrases']:
            print(' ' + phrase)
        print('---------------------')
    conn.close()
except Exception as ex:
    print(ex)

{'documents': [{'id': '1', 'keyPhrases': ['Wow', 'course']}, {'id': '2', 'keyPhrases': ['course']}], 'errors': []}
Document 1 has these key phrases:
 Wow
 course
---------------------
Document 2 has these key phrases:
 course
---------------------


# Removing Stopwords

In [6]:
# curlはClient for URLの略
# コマンドを実行した端末をクライアントとして、URLにデータを送信し、応答となるデータを受信する
# -o でファイルに出力可能
!curl http://www.sthda.com/sthda/RDoc/example-files/martin-luther-king-i-have-a-dream-speech.txt -o speech.txt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
 35  3307   35  1178    0     0   1680      0  0:00:01 --:--:--  0:00:01  1680
100  3307  100  3307    0     0   4717      0 --:--:-- --:--:-- --:--:--  4710


In [7]:
document = open('speech.txt', 'r')

mlk_speech = document.read()
print(mlk_speech)


And so even though we face the difficulties of today and tomorrow, I still have a dream. It is a dream deeply rooted in the American dream.
 
I have a dream that one day this nation will rise up and live out the true meaning of its creed:
 
We hold these truths to be self-evident, that all men are created equal.
 
I have a dream that one day on the red hills of Georgia, the sons of former slaves and the sons of former slave owners will be able to sit down together at the table of brotherhood.
 
I have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice, sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice.
 
I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character.
 
I have a dream today!
 
I have a dream that one day, down in Alabama, with its vicious racists, with its governor having 

## Remove numbers

In [8]:
mlk_speech = ''.join(c for c in mlk_speech if not c.isdigit()) # 1文字ずつ数字かどうか確認

## Remove punctuation and make lower case

In [9]:
from string import punctuation
print(punctuation)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


In [10]:
# 記号を削除し、大文字を小文字に変換
mlk_speech = ''.join(c for c in mlk_speech if c not in punctuation).lower()
print(mlk_speech)


and so even though we face the difficulties of today and tomorrow i still have a dream it is a dream deeply rooted in the american dream
 
i have a dream that one day this nation will rise up and live out the true meaning of its creed
 
we hold these truths to be selfevident that all men are created equal
 
i have a dream that one day on the red hills of georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at the table of brotherhood
 
i have a dream that one day even the state of mississippi a state sweltering with the heat of injustice sweltering with the heat of oppression will be transformed into an oasis of freedom and justice
 
i have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character
 
i have a dream today
 
i have a dream that one day down in alabama with its vicious racists with its governor having his lips dripping 

## Remove stopwords

In [11]:
# stopwords : be, a, the 等
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stopwords.words('english')[:5]

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nakam\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['i', 'me', 'my', 'myself', 'we']

In [12]:
# splitでスペースで分割し、ストップワードと比較
mlk_speech = ' '.join([word for word in mlk_speech.split() if word not in (stopwords.words('english'))])

print(mlk_speech)

even though face difficulties today tomorrow still dream dream deeply rooted american dream dream one day nation rise live true meaning creed hold truths selfevident men created equal dream one day red hills georgia sons former slaves sons former slave owners able sit together table brotherhood dream one day even state mississippi state sweltering heat injustice sweltering heat oppression transformed oasis freedom justice dream four little children one day live nation judged color skin content character dream today dream one day alabama vicious racists governor lips dripping words interposition nullification one day right alabama little black boys black girls able join hands little white boys white girls sisters brothers dream today dream one day every valley shall exalted every hill mountain shall made low rough places made plain crooked places made straight glory lord shall revealed flesh shall see together hope faith go back south faith able hew mountain despair stone hope faith abl

# Stemming Technique

In [13]:
# free と freedomを同じ扱いをする : Stemming
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.probability import FreqDist
import pandas as pd
nltk.download('punkt')

ps = PorterStemmer()
words = nltk.tokenize.word_tokenize(mlk_speech) # 単語をリスト化
stems = [ps.stem(word) for word in words]
print(stems)

fd = FreqDist(stems) # 辞書形式
fd_df = pd.DataFrame(fd, index=[0]).T 

print(fd_df)

['even', 'though', 'face', 'difficulti', 'today', 'tomorrow', 'still', 'dream', 'dream', 'deepli', 'root', 'american', 'dream', 'dream', 'one', 'day', 'nation', 'rise', 'live', 'true', 'mean', 'creed', 'hold', 'truth', 'selfevid', 'men', 'creat', 'equal', 'dream', 'one', 'day', 'red', 'hill', 'georgia', 'son', 'former', 'slave', 'son', 'former', 'slave', 'owner', 'abl', 'sit', 'togeth', 'tabl', 'brotherhood', 'dream', 'one', 'day', 'even', 'state', 'mississippi', 'state', 'swelter', 'heat', 'injustic', 'swelter', 'heat', 'oppress', 'transform', 'oasi', 'freedom', 'justic', 'dream', 'four', 'littl', 'children', 'one', 'day', 'live', 'nation', 'judg', 'color', 'skin', 'content', 'charact', 'dream', 'today', 'dream', 'one', 'day', 'alabama', 'viciou', 'racist', 'governor', 'lip', 'drip', 'word', 'interposit', 'nullif', 'one', 'day', 'right', 'alabama', 'littl', 'black', 'boy', 'black', 'girl', 'abl', 'join', 'hand', 'littl', 'white', 'boy', 'white', 'girl', 'sister', 'brother', 'dream', '

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nakam\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [14]:
import matplotlib.pyplot as plt

counts = fd_df.sort_values(0, ascending=False)
ar = plt.figure(figsize=(16,9))
ax = ar.gca()
counts[0][:60].plot(kind='bar', ax=ax)
ax.set_title('Frequency Distribution')
ax.set_ylabel('Freq of words')
ax.set_xlabel('Words')
plt.show()

<Figure size 1600x900 with 1 Axes>