In [1]:
from google.cloud import language_v1
from tqdm import tqdm
from tqdm import trange

In [3]:
# Imports the Google Cloud client library
!pip install --upgrade google-cloud-language

# Instantiates a client
client = language_v1.LanguageServiceClient.from_service_account_json("C:/DevRoot/dataset/flowing-bazaar-334005-93614458e39e.json")

# The text to analyze
text = u"Hello, world!"
document = language_v1.Document(
    content=text, type_=language_v1.Document.Type.PLAIN_TEXT
)

# Detects the sentiment of the text
sentiment = client.analyze_sentiment(
    request={"document": document}
).document_sentiment

print("Text: {}".format(text))
print("Sentiment: {}, {}".format(sentiment.score, sentiment.magnitude))



Text: Hello, world!
Sentiment: 0.6000000238418579, 0.6000000238418579


In [4]:
"""Demonstrates how to make a simple call to the Natural Language API."""

import argparse

from google.cloud import language_v1

def print_result(annotations):
    score = annotations.document_sentiment.score
    magnitude = annotations.document_sentiment.magnitude

    for index, sentence in enumerate(annotations.sentences):
        sentence_sentiment = sentence.sentiment.score
        print(
            "Sentence {} has a sentiment score of {}".format(index, sentence_sentiment)
        )

    print(
        "Overall Sentiment: score of {} with magnitude of {}".format(score, magnitude)
    )
    
    return 0

def analyze(content):
    """Run a sentiment analysis request on text within a passed filename."""
    client = language_v1.LanguageServiceClient.from_service_account_json("C:/DevRoot/dataset/flowing-bazaar-334005-93614458e39e.json")

#     with open(movie_review_filename, "r",  encoding='UTF8') as review_file:
#         # Instantiates a plain text document.
#         content = review_file.read()

    

    document = language_v1.Document(
        content=content, type_=language_v1.Document.Type.PLAIN_TEXT
    )
    
    annotations = client.analyze_sentiment(request={"document": document})

    # Print the results
    score = annotations.document_sentiment.score
    return score
    
    
# if __name__ == "__main__":
#     parser = argparse.ArgumentParser(
#         description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
#     )
#     parser.add_argument(
#         "movie_review_filename",
#         help="The filename of the movie review you'd like to analyze.",
#     )
#     args = parser.parse_args()

#     analyze(args.movie_review_filename)

In [5]:
import os
news_list = [file for file in os.listdir('.') if 'csv' in file]
news_list = news_list[51:64]
news_list

['삼성화재_기사_크롤링_2020.01.01_2021.12.06.csv',
 '셀트리온_기사_크롤링_2020.01.01_2021.12.06.csv',
 '신한지주_기사_크롤링_2020.01.01_2021.12.06.csv',
 '쌍용C&E_기사_크롤링_2020.01.01_2021.12.06.csv',
 '아모레G_기사_크롤링_2020.01.01_2021.12.06.csv',
 '아모레퍼시픽_기사_크롤링_2020.01.01_2021.12.06.csv',
 '에스오일_기사_크롤링_2020.01.01_2021.12.06.csv',
 '엔씨소프트_기사_크롤링_2020.01.01_2021.12.06.csv',
 '오리온_기사_크롤링_2020.01.01_2021.12.06.csv',
 '우리금융지주_기사_크롤링_2020.01.01_2021.12.06.csv',
 '유한양행_기사_크롤링_2020.01.01_2021.12.06.csv',
 '이마트_기사_크롤링_2020.01.01_2021.12.06.csv',
 '일진머티리얼즈_기사_크롤링_2020.01.01_2021.12.06.csv']

In [6]:
import pandas as pd

import os

def sentiment_scoring(news):
    contents = pd.read_csv(news)
    contents_refine = contents.loc[:,'refine_content']
    contents = pd.DataFrame(contents)
    contents_list = []

    tot_sum = 0
    for content in tqdm(contents.refine_content, desc = news):
        if pd.isna(content):
            contents_list.append(0)
        else:
            contents_list.append(analyze(content))
            
        tot_sum += 1
        
    contents['Senti_Score'] = contents_list
    
    news_path = os.path.join('.', news)
    contents.to_csv(news_path, encoding='utf-8')   

In [7]:
sentiment_scoring('포스코_기사_크롤링_2020.01.01_2021.12.06.csv')

포스코_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|████████████████████████████████| 3034/3034 [39:44<00:00,  1.27it/s]


In [13]:
for news in news_list[64]:
    sentiment_scoring(news)

셀트리온_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 1142/1142 [14:44<00:00,  1.29it/s]
신한지주_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 340/340 [04:27<00:00,  1.27it/s]
쌍용C&E_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 23/23 [00:15<00:00,  1.45it/s]
아모레G_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 6/6 [00:03<00:00,  1.68it/s]
아모레퍼시픽_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 611/611 [07:53<00:00,  1.29it/s]
에스오일_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 16/16 [00:13<00:00,  1.15it/s]
엔씨소프트_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 1027/1027 [12:29<00:00,  1.37it/s]
오리온_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 284/284 [03:36<00:00,  1.31it/s]
우리금융지주_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 375/375 [04:36<00:00,  1.36it/s]
유한양행_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 190/190 [02:22<00:00,  1.34it/s]
이마트_기사_크롤링_2020.01.01_2021.12.06.csv: 100%|██████████| 2660/2660 [32:39<00:00,  1.36it/s]
일진머티리얼즈_기사_크롤링_2020

TypeError: nan has type float, but expected one of: bytes, unicode