## PROJECT-2: PART-2 - NLP Stock Sentiment Scores

---

In [2]:
# Initial imports
import os
import json
import requests
import dateutil
import pandas as pd
import numpy as np
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
%matplotlib inline

# Importing Natural Language Libraries and Dependencies
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
analyzer = SentimentIntensityAnalyzer()

# Importing warning ignore filter
import warnings
warnings.simplefilter(action="ignore")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\VSNU\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [143]:
# Sourcing and Preprocessing Input Data 
filepath = Path("NLP_Resource/Resource/_COMPOSITE2.csv")
algo_results_df = pd.read_csv(filepath, parse_dates=True, infer_datetime_format=True)
for i in range(len(algo_results_df)):
    dt=dateutil.parser.parse(algo_results_df['Date'][i])
    mm=dt.month
    dd=dt.day
    yyyy=dt.year
    if mm<10:
        mm='0'+str(mm)
    if dd<10:
        dd='0'+str(dd)
    algo_results_df['Date'][i]=str(mm)+str(dd)+str(yyyy)
algo_results_df.head()

Unnamed: 0,Stock,Date,Buy/Sell
0,JPM,2052019,1
1,JPM,2212019,1
2,JPM,3192019,1
3,JPM,4092019,1
4,JPM,6132019,1


In [144]:
# JSON Source File Name Generation
temp=[]
for i in range(len(algo_results_df)):
    temp.append(algo_results_df['Stock'][i]+algo_results_df['Date'][i])
algo_results_df['JSON File Name']=temp
algo_results_df.head()

Unnamed: 0,Stock,Date,Buy/Sell,JSON File Name
0,JPM,2052019,1,JPM02052019
1,JPM,2212019,1,JPM02212019
2,JPM,3192019,1,JPM03192019
3,JPM,4092019,1,JPM04092019
4,JPM,6132019,1,JPM06132019


In [145]:
# Function to Collate Text in a JSON file into a Single Source for Sentiment Score Calculation
def String(s):  
    str = " "  
    for ele in s:  
        str = str+' '+ele   
    return str

In [146]:
# Function to Calculate Sentiment Scores
def stock_score(json_path):
    
    # Reading JSON file for calculating NLP Sentiment Scores
    with open(json_path, 'r') as f:
        new_data = json.load(f)    
        
    # Calculating Compound Sentiment Scores
    if len(new_data['data'])==0:
        compound=None
    else:
        temp_data=[]
        for i in range(0,len(new_data['data'])):
            temp=(new_data['data'][i]['title'])+' '+(new_data['data'][i]['text'])
            temp_data.append(temp)
        data=String(temp_data)
        sentiment = analyzer.polarity_scores(data)
        compound = sentiment["compound"]
    
    return compound

In [147]:
# Script to Calculate Compound Scores
sentiment_score=[]
for i in range(len(algo_results_df)):
    json_path="NLP_Resource/"+algo_results_df['JSON File Name'][i]+'.json'
    sentiment_score.append(stock_score(json_path))
algo_results_df['Sentiment Score']=sentiment_score
algo_results_df.head()

Unnamed: 0,Stock,Date,Buy/Sell,JSON File Name,Sentiment Score
0,JPM,2052019,1,JPM02052019,
1,JPM,2212019,1,JPM02212019,-0.4767
2,JPM,3192019,1,JPM03192019,0.9075
3,JPM,4092019,1,JPM04092019,0.5236
4,JPM,6132019,1,JPM06132019,


#### *Following commented out scode snippet was used to write sentiment scores to 'Sentiment_Scores_1.csv' file. For convenience, this file has already been created during project development and deployment, and has been placed under 'Resources' folder. This file was used as input data for machine learning/artificial neural network model under 'ALGO_ML_DEV_3_FINAL.ipynb'*

In [None]:
# algo_results_df.to_csv("Sentiment_Scores_1.csv")