In [8]:
import pandas as pd
import json
import boto3
import time

comprehend = boto3.client(service_name='comprehend', region_name='us-east-1')

In [2]:
with open('./final_json_data.json') as f:
    df = pd.DataFrame(json.load(f))

In [3]:
df.head()

Unnamed: 0,sentiment,text
0,positive,"Thank you. Good afternoon, everyone. And welco..."
1,positive,"As we look past Q1, we expect the channel inve..."
2,neutral,And your last question comes from the line of ...
3,negative,"On the China gaming weakness, is it the slower..."
4,negative,"I don't know that we could tear that apart, te..."


In [4]:
text_list = df.text.tolist()

<h1> Amazon

In [5]:
amazon_sentiments_label = []
amazon_score_positive = []
amazon_score_negative = []
amazon_score_neutral = []
amazon_score_mixed = []

In [6]:
df.head()

Unnamed: 0,sentiment,text
0,positive,"Thank you. Good afternoon, everyone. And welco..."
1,positive,"As we look past Q1, we expect the channel inve..."
2,neutral,And your last question comes from the line of ...
3,negative,"On the China gaming weakness, is it the slower..."
4,negative,"I don't know that we could tear that apart, te..."


In [9]:
start = time.time()
for text in text_list:
    my_json = json.loads(json.dumps(comprehend.detect_sentiment(Text=text, LanguageCode='en'), sort_keys=True))
    amazon_sentiments_label.append(my_json['Sentiment'].lower())
    amazon_score_mixed.append(my_json['SentimentScore']['Mixed'])
    amazon_score_negative.append(my_json['SentimentScore']['Negative'])
    amazon_score_positive.append(my_json['SentimentScore']['Positive'])
    amazon_score_neutral.append(my_json['SentimentScore']['Neutral'])
end = time.time()
print("Time to execute", end-start)

Time to execute -96.88834238052368


In [10]:
df['amazon_sentiments_label'] = pd.Series(amazon_sentiments_label)
df['amazon_score_mixed'] = pd.Series(amazon_score_mixed)
df['amazon_score_negative'] = pd.Series(amazon_score_negative)
df['amazon_score_neutral'] = pd.Series(amazon_score_neutral)
df['amazon_score_positive'] = pd.Series(amazon_score_positive)

In [11]:
df.head()

Unnamed: 0,sentiment,text,amazon_sentiments_label,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive
0,positive,"Thank you. Good afternoon, everyone. And welco...",neutral,0.001549,0.000533,0.750581,0.247337
1,positive,"As we look past Q1, we expect the channel inve...",neutral,0.009229,0.006072,0.880566,0.104133
2,neutral,And your last question comes from the line of ...,neutral,0.000831,0.008268,0.980631,0.01027
3,negative,"On the China gaming weakness, is it the slower...",negative,0.011638,0.953385,0.034877,9.9e-05
4,negative,"I don't know that we could tear that apart, te...",positive,0.014957,0.000221,0.007328,0.977495


<h1> Watson- IBM

In [13]:
import os
import json
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions

In [27]:
api_key = os.environ.get('IAM_ACCESS_IBM')
url = "https://gateway.watsonplatform.net/natural-language-understanding/api"
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2018-11-16',
    iam_apikey=api_key,
    url=url
)
def ibm_sentiments(data):
#     data = ("I've seen things you people wouldn't believe. Attack ships on fire off the shoulder of Orion. I watched C-beams glitter in the dark near the Tannhauser Gate. All those moments will be lost in time, like tears in rain. Time to die.")
    response = natural_language_understanding.analyze(
        text=data,
        features=Features(sentiment=SentimentOptions()),
        language="en"
    ).get_result()
    response = json.loads(json.dumps(response))
    return response


In [28]:
ibm_sentiments_label = []
ibm_score = []

In [29]:
start = time.time()
for text in text_list:
    my_json = ibm_sentiments(text)
    ibm_sentiments_label.append(my_json['sentiment']['document']['label'].lower())
    ibm_score.append(my_json['sentiment']['document']['score'])
end = time.time()
print("Time to execute", end-start)

Time to execute 626.9456360340118


In [30]:
df['ibm_sentiments_label'] = pd.Series(ibm_sentiments_label)
df['ibm_score'] = pd.Series(ibm_score)

In [54]:
# df.drop(['amazon'],axis =1, inplace=True)

In [31]:
df.head()

Unnamed: 0,sentiment,text,amazon_sentiments_label,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive,ibm_sentiments_label,ibm_score
0,positive,"Thank you. Good afternoon, everyone. And welco...",neutral,0.001549,0.000533,0.750581,0.247337,positive,0.816136
1,positive,"As we look past Q1, we expect the channel inve...",neutral,0.009229,0.006072,0.880566,0.104133,positive,0.558518
2,neutral,And your last question comes from the line of ...,neutral,0.000831,0.008268,0.980631,0.01027,neutral,0.0
3,negative,"On the China gaming weakness, is it the slower...",negative,0.011638,0.953385,0.034877,9.9e-05,negative,-0.598559
4,negative,"I don't know that we could tear that apart, te...",positive,0.014957,0.000221,0.007328,0.977495,positive,0.790615


In [32]:
df.to_json(path_or_buf ="temp_label_json_data.json",orient='records') 

<h1>Google cloud language

In [54]:
import pandas as pd
import json
import time
import os

In [10]:
with open('./temp_label_json_data.json') as f:
    df = pd.DataFrame(json.load(f))

In [11]:
df.head()

Unnamed: 0,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive,amazon_sentiments_label,ibm_score,ibm_sentiments_label,sentiment,text
0,0.001549,0.000533,0.750581,0.247337,neutral,0.816136,positive,positive,"Thank you. Good afternoon, everyone. And welco..."
1,0.009229,0.006072,0.880566,0.104133,neutral,0.558518,positive,positive,"As we look past Q1, we expect the channel inve..."
2,0.000831,0.008268,0.980631,0.01027,neutral,0.0,neutral,neutral,And your last question comes from the line of ...
3,0.011638,0.953385,0.034877,9.9e-05,negative,-0.598559,negative,negative,"On the China gaming weakness, is it the slower..."
4,0.014957,0.000221,0.007328,0.977495,positive,0.790615,positive,negative,"I don't know that we could tear that apart, te..."


In [14]:
text_list = df['text'].tolist()

In [15]:
text_list[0]

"Thank you. Good afternoon, everyone. And welcome to NVIDIA's conference call for the fourth quarter of fiscal 2019. With me on the call today from NVIDIA are Jen-Hsun Huang, President and Chief Executive Officer and Colette Kress, Executive Vice President and Chief Financial Officer. I'd like to remind you that our call is being webcast live on NVIDIA's Investor Relations Web site."

In [25]:
## This code runs on linux and not on Windows
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

# Instantiates a client
client = language.LanguageServiceClient()

def google_sentiments_api(text_in):
    # The text to analyze
    text = text_in
    document = types.Document(
        content=text,
        type=enums.Document.Type.PLAIN_TEXT)

    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment
    return sentiment

# print('Text: {}'.format(text))
# print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))

The score of a document's sentiment indicates the overall emotion of a document. The magnitude of a document's sentiment indicates how much emotional content is present within the document, and this value is often proportional to the length of the document.

It is important to note that the Natural Language API indicates differences between positive and negative emotion in a document, but does not identify specific positive and negative emotions. For example, "angry" and "sad" are both considered negative emotions. However, when the Natural Language API analyzes text that is considered "angry", or text that is considered "sad", the response only indicates that the sentiment in the text is negative, not "sad" or "angry".

A document with a neutral score (around 0.0) may indicate a low-emotion document, or may indicate mixed emotions, with both high positive and negative values which cancel each out. Generally, you can use magnitude values to disambiguate these cases, as truly neutral documents will have a low magnitude value, while mixed documents will have higher magnitude values.

When comparing documents to each other (especially documents of different length), make sure to use the magnitude values to calibrate your scores, as they can help you gauge the relevant amount of emotional content.

The chart below shows some sample values and how to interpret them:

Sentiment	Sample Values
<p>Clearly Positive*	"score": 0.8, "magnitude": 3.0
<p>Clearly Negative*	"score": -0.6, "magnitude": 4.0
<p>Neutral	"score": 0.1, "magnitude": 0.0
<p>Mixed	"score": 0.0, "magnitude": 4.0
<p>* “Clearly positive” and “clearly negative” sentiment varies for different use cases and customers. You might find differing results for your specific scenario. We recommend that you define a threshold that works for you, and then adjust the threshold after testing and verifying the results. For example, you may define a threshold of any score over 0.25 as clearly positive, and then modify the score threshold to 0.15 after reviewing your data and results and finding that scores from 0.15-0.25 should be considered positive as well.

In [26]:
def get_label(sentiment):
    label = ""
    if sentiment.score > 0.5 and sentiment.magnitude > 1.5:
        label = "positive"
    elif sentiment.score < -0.5 and sentiment.magnitude > 1.5:
        label = "negative"
    else:
        label = "neutral"
    return label

In [45]:
google_sentiment_socre = []
google_sentiment_magnitude = []
google_sentiment_label = []

In [46]:
start = time.time()
# i = 0
for text in text_list:
    sentiment = google_sentiments_api(text)
#     print("Index" + str(i) + " Score: " + str(sentiment.score) + " Magn: " + str(sentiment.magnitude))
    google_sentiment_socre.append(sentiment.score)
    google_sentiment_magnitude.append(sentiment.magnitude)
    google_sentiment_label.append(get_label(sentiment))
#     i+=1
end = time.time()
print("Time to execute", end-start)

Time to execute 238.47278022766113


In [47]:
df['google_sentiment_socre'] = pd.Series(google_sentiment_socre)
df['google_sentiment_magnitude'] = pd.Series(google_sentiment_magnitude)
df['google_sentiment_label'] = pd.Series(google_sentiment_label)

In [48]:
df.head()

Unnamed: 0,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive,amazon_sentiments_label,ibm_score,ibm_sentiments_label,sentiment,text,google_sentiment_socre,google_sentiment_magnitude,google_sentiment_label
0,0.001549,0.000533,0.750581,0.247337,neutral,0.816136,positive,positive,"Thank you. Good afternoon, everyone. And welco...",0.2,1.1,neutral
1,0.009229,0.006072,0.880566,0.104133,neutral,0.558518,positive,positive,"As we look past Q1, we expect the channel inve...",0.2,1.4,neutral
2,0.000831,0.008268,0.980631,0.01027,neutral,0.0,neutral,neutral,And your last question comes from the line of ...,0.0,0.0,neutral
3,0.011638,0.953385,0.034877,9.9e-05,negative,-0.598559,negative,negative,"On the China gaming weakness, is it the slower...",-0.3,1.2,neutral
4,0.014957,0.000221,0.007328,0.977495,positive,0.790615,positive,negative,"I don't know that we could tear that apart, te...",0.5,7.9,neutral


Correcting the label values based on few observations

In [50]:
def get_label_corrected(sentiment):
    label = ""
    if sentiment.score > 0.2 and sentiment.magnitude > 1.0:
        label = "positive"
    elif sentiment.score < -0.3 and sentiment.magnitude > 1.0:
        label = "negative"
    else:
        label = "neutral"
    return label

In [51]:
google_sentiment_socre = []
google_sentiment_magnitude = []
google_sentiment_label = []
start = time.time()
for text in text_list:
    sentiment = google_sentiments_api(text)
    google_sentiment_socre.append(sentiment.score)
    google_sentiment_magnitude.append(sentiment.magnitude)
    google_sentiment_label.append(get_label_corrected(sentiment))
end = time.time()
print("Time to execute", end-start)
df['google_sentiment_socre'] = pd.Series(google_sentiment_socre)
df['google_sentiment_magnitude'] = pd.Series(google_sentiment_magnitude)
df['google_sentiment_label'] = pd.Series(google_sentiment_label)

Time to execute 248.48036670684814


In [52]:
df.head()

Unnamed: 0,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive,amazon_sentiments_label,ibm_score,ibm_sentiments_label,sentiment,text,google_sentiment_socre,google_sentiment_magnitude,google_sentiment_label
0,0.001549,0.000533,0.750581,0.247337,neutral,0.816136,positive,positive,"Thank you. Good afternoon, everyone. And welco...",0.2,1.1,positive
1,0.009229,0.006072,0.880566,0.104133,neutral,0.558518,positive,positive,"As we look past Q1, we expect the channel inve...",0.2,1.4,positive
2,0.000831,0.008268,0.980631,0.01027,neutral,0.0,neutral,neutral,And your last question comes from the line of ...,0.0,0.0,neutral
3,0.011638,0.953385,0.034877,9.9e-05,negative,-0.598559,negative,negative,"On the China gaming weakness, is it the slower...",-0.3,1.2,negative
4,0.014957,0.000221,0.007328,0.977495,positive,0.790615,positive,negative,"I don't know that we could tear that apart, te...",0.5,7.9,positive


In [53]:
df.to_json(path_or_buf ="temp2_label_json_data.json",orient='records') 

<h1>Azure-Cognitive Services-Text Analysis

In [82]:
import pandas as pd
import json
import time
import os

In [111]:
subscription_key = "a4478e4ed0f1400a9f79298935025c63"

In [102]:
subscription_key = os.environ.get('AZURE_SUB_KEY_1')
assert subscription_key

In [85]:
with open('./temp2_label_json_data.json') as f:
    df = pd.DataFrame(json.load(f))

In [86]:
df.head()

Unnamed: 0,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive,amazon_sentiments_label,google_sentiment_label,google_sentiment_magnitude,google_sentiment_socre,ibm_score,ibm_sentiments_label,sentiment,text
0,0.001549,0.000533,0.750581,0.247337,neutral,positive,1.1,0.2,0.816136,positive,positive,"Thank you. Good afternoon, everyone. And welco..."
1,0.009229,0.006072,0.880566,0.104133,neutral,positive,1.4,0.2,0.558518,positive,positive,"As we look past Q1, we expect the channel inve..."
2,0.000831,0.008268,0.980631,0.01027,neutral,neutral,0.0,0.0,0.0,neutral,neutral,And your last question comes from the line of ...
3,0.011638,0.953385,0.034877,9.9e-05,negative,negative,1.2,-0.3,-0.598559,negative,negative,"On the China gaming weakness, is it the slower..."
4,0.014957,0.000221,0.007328,0.977495,positive,positive,7.9,0.5,0.790615,positive,negative,"I don't know that we could tear that apart, te..."


In [87]:
text_list = df['text'].tolist()

In [88]:
text_analytics_base_url = "https://westcentralus.api.cognitive.microsoft.com/text/analytics/v2.0/"

In [89]:
sentiment_api_url = text_analytics_base_url + "sentiment"
print(sentiment_api_url)

https://westcentralus.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment


In [90]:
document_list

[{'id': '0',
  'language': 'en',
  'text': "Thank you. Good afternoon, everyone. And welcome to NVIDIA's conference call for the fourth quarter of fiscal 2019. With me on the call today from NVIDIA are Jen-Hsun Huang, President and Chief Executive Officer and Colette Kress, Executive Vice President and Chief Financial Officer. I'd like to remind you that our call is being webcast live on NVIDIA's Investor Relations Web site."},
 {'id': '1',
  'language': 'en',
  'text': "As we look past Q1, we expect the channel inventory correction to be behind us and our business to have bottomed. On a full year basis, we expect our gaming business to be down slightly given the tough first half compares with growth in Turing and notebook gaming. At CES last month, we launched into the recovery of our gaming business. We announced the GeForce RTX 2060 at the mid range price point of [$349] [ph]. The 2060 delivers a 60% performance improvement over the GTX 1060 while also bringing Turing's real time ra

In [96]:
document_list = []
for i,text in enumerate(text_list):
    document = {"id":str(i),
                "language":"en",
                "text":text[:5119]
    }
    document_list.append(document)

In [97]:
document_list[1]

{'id': '1',
 'language': 'en',
 'text': "As we look past Q1, we expect the channel inventory correction to be behind us and our business to have bottomed. On a full year basis, we expect our gaming business to be down slightly given the tough first half compares with growth in Turing and notebook gaming. At CES last month, we launched into the recovery of our gaming business. We announced the GeForce RTX 2060 at the mid range price point of [$349] [ph]. The 2060 delivers a 60% performance improvement over the GTX 1060 while also bringing Turing's real time ray tracing and AI features to the mass market for the first time. The 2060 has received rave reviews and is off to a great start."}

In [103]:
document_part_1 = {"documents" : document_list[:1000]}

In [104]:
document_part_1

{'documents': [{'id': '0',
   'language': 'en',
   'text': "Thank you. Good afternoon, everyone. And welcome to NVIDIA's conference call for the fourth quarter of fiscal 2019. With me on the call today from NVIDIA are Jen-Hsun Huang, President and Chief Executive Officer and Colette Kress, Executive Vice President and Chief Financial Officer. I'd like to remind you that our call is being webcast live on NVIDIA's Investor Relations Web site."},
  {'id': '1',
   'language': 'en',
   'text': "As we look past Q1, we expect the channel inventory correction to be behind us and our business to have bottomed. On a full year basis, we expect our gaming business to be down slightly given the tough first half compares with growth in Turing and notebook gaming. At CES last month, we launched into the recovery of our gaming business. We announced the GeForce RTX 2060 at the mid range price point of [$349] [ph]. The 2060 delivers a 60% performance improvement over the GTX 1060 while also bringing Tu

In [112]:
start = time.time()
headers   = {"Ocp-Apim-Subscription-Key": subscription_key}
response_1  = requests.post(sentiment_api_url, headers=headers, json=document_part_1)
sentiments_part_1 = response_1.json()
pprint(sentiments_part_1)
end = time.time()
print("Time to execute", end-start)

{'documents': [{'id': '0', 'score': 0.9783278107643127},
               {'id': '1', 'score': 0.5},
               {'id': '2', 'score': 0.5},
               {'id': '3', 'score': 0.5},
               {'id': '4', 'score': 0.9059334397315979},
               {'id': '5', 'score': 0.9041327238082886},
               {'id': '6', 'score': 0.8321921229362488},
               {'id': '7', 'score': 0.5},
               {'id': '8', 'score': 0.5},
               {'id': '9', 'score': 0.7486802935600281},
               {'id': '10', 'score': 0.952201247215271},
               {'id': '11', 'score': 0.5},
               {'id': '12', 'score': 0.5},
               {'id': '13', 'score': 0.5},
               {'id': '14', 'score': 0.5},
               {'id': '15', 'score': 0.9159338474273682},
               {'id': '16', 'score': 0.5},
               {'id': '17', 'score': 0.8002594709396362},
               {'id': '18', 'score': 0.5},
               {'id': '19', 'score': 0.5},
               {'id': '20', 'sc

In [106]:
document_part_2 = {"documents" : document_list[1000:]}

In [114]:
document_part_2

{'documents': [{'id': '1000',
   'language': 'en',
   'text': 'Okay. Or do you want, Deepak, to save you?'},
  {'id': '1001', 'language': 'en', 'text': 'Deepak?'},
  {'id': '1002',
   'language': 'en',
   'text': "Sure, okay. Thank you, yes. Well, first of all, Elon, thank you very much for the opportunity for me to be here and be here again a second time. I've learned a lot from you and I've been always inspired by you, and I've been also very inspired by the team at Tesla who are incredibly brilliant, very passionate and just amazingly perseverant, the best team I could imagine. So thank you everybody for that. There is no good time to make this change. We felt strongly this was a good time. It's a new chapter, a new year. Tesla has had two great quarters of profitability, cash flow, so now a really solid foundation. And I feel really good about Zach taking over as the CFO. He's proven himself with his many years of experience and many tough challenges that he's worked on and really 

In [115]:
start = time.time()
headers   = {"Ocp-Apim-Subscription-Key": subscription_key}
response_2  = requests.post(sentiment_api_url, headers=headers, json=document_part_2)
sentiments_part_2 = response_2.json()
pprint(sentiments_part_2)
end = time.time()
print("Time to execute", end-start)

{'documents': [{'id': '1000', 'score': 0.8111072778701782},
               {'id': '1001', 'score': 0.7526739239692688},
               {'id': '1002', 'score': 0.996847927570343},
               {'id': '1003', 'score': 0.9538787603378296},
               {'id': '1004', 'score': 0.973930835723877},
               {'id': '1005', 'score': 0.9699565172195435},
               {'id': '1006', 'score': 0.8949838876724243},
               {'id': '1007', 'score': 0.9769207239151001},
               {'id': '1008', 'score': 0.9347273707389832},
               {'id': '1009', 'score': 0.721630871295929},
               {'id': '1010', 'score': 0.5},
               {'id': '1011', 'score': 0.5},
               {'id': '1012', 'score': 0.26626986265182495},
               {'id': '1013', 'score': 0.7673466801643372},
               {'id': '1014', 'score': 0.5},
               {'id': '1015', 'score': 0.7474827170372009},
               {'id': '1016', 'score': 0.5},
               {'id': '1017', 'score': 0.5

In [129]:
def get_azure_label(score):
    label = ""
    if score > 0.75:
        label = 'positive'
    elif score >= 0.5:
        label = 'neutral'
    else:
        label = 'negative'
    return label

In [118]:
sentiments_part_2['documents'][0]['score']
# print("Type",type(document_part_1['documents']))

0.8111072778701782

In [130]:
azure_api_score = []
azure_api_label = []

for score_dict in sentiments_part_1['documents']:
    azure_api_score.append(score_dict['score'])
    azure_api_label.append(get_azure_label(score_dict['score']))
for score_dict in sentiments_part_2['documents']:
    azure_api_score.append(score_dict['score'])
    azure_api_label.append(get_azure_label(score_dict['score']))

In [131]:
df['azure_api_score'] = pd.Series(azure_api_score)
df['azure_api_label'] = pd.Series(azure_api_label)

In [132]:
df.head()

Unnamed: 0,amazon_score_mixed,amazon_score_negative,amazon_score_neutral,amazon_score_positive,amazon_sentiments_label,google_sentiment_label,google_sentiment_magnitude,google_sentiment_socre,ibm_score,ibm_sentiments_label,sentiment,text,azure_api_score,azure_api_label
0,0.001549,0.000533,0.750581,0.247337,neutral,positive,1.1,0.2,0.816136,positive,positive,"Thank you. Good afternoon, everyone. And welco...",0.978328,positive
1,0.009229,0.006072,0.880566,0.104133,neutral,positive,1.4,0.2,0.558518,positive,positive,"As we look past Q1, we expect the channel inve...",0.5,neutral
2,0.000831,0.008268,0.980631,0.01027,neutral,neutral,0.0,0.0,0.0,neutral,neutral,And your last question comes from the line of ...,0.5,neutral
3,0.011638,0.953385,0.034877,9.9e-05,negative,negative,1.2,-0.3,-0.598559,negative,negative,"On the China gaming weakness, is it the slower...",0.5,neutral
4,0.014957,0.000221,0.007328,0.977495,positive,positive,7.9,0.5,0.790615,positive,negative,"I don't know that we could tear that apart, te...",0.905933,positive


In [133]:
df.to_json(path_or_buf ="final_label_json_data.json",orient='records') 