In [3]:
!pip install --upgrade "watson-developer-cloud>=2.2.6"

Requirement already up-to-date: watson-developer-cloud>=2.2.6 in /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages (2.2.6)


In [47]:
%run "../../../apikey.py"

from watson_developer_cloud import ToneAnalyzerV3, WatsonApiException
import csv
import pandas as pd

# version can be 2016-05-19 or 2017-09-21
# 2016 version returns between 0 and 1, 2017 returns between 0.5 and 1
tone_analyzer = ToneAnalyzerV3(
    version='2017-09-21',
    username=api['ToneAnalyzer']['username'],
    password=api['ToneAnalyzer']['password'],
    url=api['ToneAnalyzer']['URL']
)

In [33]:
text = 'Team, I know that times are tough! Product '\
    'sales have been disappointing for the past three '\
    'quarters. We have a competitive product, but we '\
    'need to do a better job of selling it!'

In [13]:
# analyze 'text': only 2500 API calls / month!
try:
    result = tone_analyzer.tone(
        {'text': text},
        'application/json'
    ).get_result()
except WatsonApiException as ex:
    print("Method failed with status code " + str(ex.code) + ": " + ex.message)

In [22]:
def to_tone_dict(tone_arr_json):
    """
    turn [dict({'score': {float}, 'tone_id': {str}, 'tone_name', {str}})] * {1 <= int <= 6}
    into [{float}] * 7 where each index is the score of the tone specified in id_to_key
    """
    tone_dict = {'Anger': 0, 'Fear': 0, 'Joy': 0, 'Sadness': 0, 'Analytical': 0, 'Confident': 0, 'Tentative': 0}
    for tone in tone_arr_json:
        tone_dict[tone['tone_name']] = tone['score']
    
    return tone_dict

In [15]:
def distance(arr1, arr2):
    """
    find distance between two n-dimensional arrays
    """
    return sum([(arr2[i] - arr1[i]) ** 2 for i in range(len(arr1))]) ** (0.5)

In [20]:
def tone_distance(tones1, tones2):
    """
    find distance between two tone dictionaries
    """
    return distance(list(to_tone_dict(tones1).values()), list(to_tone_dict(tones2).values()))

In [37]:
print('text:')
print(text)
print('\n')

print('direct output:')
print(result)
print('\n')

doc_tones = result['document_tone']['tones']
sentence_tones = result['sentences_tone']
print('document-level tones:')
print(to_tone_dict(doc_tones))
print('\n')

sentences = {}
for sentence in sentence_tones:
    sentences[sentence['text']] = sentence['tones']

# prints distance from each sentence to the document-level analysis: to figure out which sentence is 
# the most different in terms of tone from the other sentences
for text, tones in sentences.items():
    print(text)
    print(to_tone_dict(tones))
    print('distance to doc_tones: ' + str(tone_distance(doc_tones, tones)))

text:
We have a competitive product, but we need to do a better job of selling it!


direct output:
{'document_tone': {'tones': [{'score': 0.6165, 'tone_id': 'sadness', 'tone_name': 'Sadness'}, {'score': 0.829888, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}, 'sentences_tone': [{'sentence_id': 0, 'text': 'Team, I know that times are tough!', 'tones': [{'score': 0.801827, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}, {'sentence_id': 1, 'text': 'Product sales have been disappointing for the past three quarters.', 'tones': [{'score': 0.771241, 'tone_id': 'sadness', 'tone_name': 'Sadness'}, {'score': 0.687768, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}, {'sentence_id': 2, 'text': 'We have a competitive product, but we need to do a better job of selling it!', 'tones': [{'score': 0.506763, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}]}


document-level tones:
{'Anger': 0, 'Fear': 0, 'Joy': 0, 'Sadness': 0.6165, 'Analytical': 0.829888, 'Confident': 0, 'Te

### Read in CSVs
from /data/raw/posts into /data/raw/sentiments

In [40]:
def analyze_tones(text):
    try:
        result = tone_analyzer.tone(
            {'text': text},
            'application/json'
        ).get_result()
        return result
    except WatsonApiException as ex:
        print("Method failed with status code " + str(ex.code) + ": " + ex.message)
        return None

In [49]:
#read in data from data folder
input_file = 'battlingdepressionnomeds100'
input_filepath = '../../data/raw/posts/' + input_file + '.csv'

# analyze data in folder, return analysis into csv
sentiments_doclevel = {'post #': [], 'username': [], 'text': [], 'Anger': [], 'Fear': [], 'Joy': [], 'Sadness': [], 'Analytical': [], 'Confident': [], 'Tentative': []}
sentiments_sentencelevel = {'post #': [], 'username': [], 'text': [], 'Anger': [], 'Fear': [], 'Joy': [], 'Sadness': [], 'Analytical': [], 'Confident': [], 'Tentative': []}

with open(input_filepath, 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        result = analyze_tones(row['post content'])
        
        # document level tones        
        sentiments_doclevel['post #'].append(row[''])
        sentiments_doclevel['username'].append(row['username'])
        sentiments_doclevel['text'].append(row['post content'])
        doclevel_tones = to_tone_dict(result['document_tone']['tones'])
        for tone in doclevel_tones.keys():
            sentiments_doclevel[tone].append(doclevel_tones[tone])
        
        # if only one sentence- only doc tones, no sentence tone
        if 'sentences_tone' in result:
            # sentence level tones
            for sentence in result['sentences_tone']:
                sentiments_sentencelevel['post #'].append(row[''])
                sentiments_sentencelevel['username'].append(row['username'])
                sentiments_sentencelevel['text'].append(sentence['text'])
                sentencelevel_tones = to_tone_dict(sentence['tones'])
                for tone in sentencelevel_tones.keys():
                    sentiments_sentencelevel[tone].append(sentencelevel_tones[tone])

In [50]:
output_file_doclevel = input_file + '_doclevelsentiments'
output_filename_doclevel = '../../data/raw/sentiments/' + output_file_doclevel + '.csv'
df = pd.DataFrame(sentiments_doclevel)
df.to_csv(output_filename_doclevel)

output_file_sentencelevel = input_file + '_sentencelevelsentiments'
output_filename_sentencelevel = '../../data/raw/sentiments/' + output_file_sentencelevel + '.csv'
df = pd.DataFrame(sentiments_sentencelevel)
df.to_csv(output_filename_sentencelevel)