# 10k Sentiment Analysis Project:

In [6]:
import os
import re
import time
import pandas as pd
from pandas import json_normalize
from datetime import date
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv
load_dotenv()
import json
import urllib.request
import xml.etree.ElementTree as ET
from collections import Counter

# Import ML libraries:
import numpy as np
from nltk import ngrams
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
# nltk.download('vader_lexicon')       # Uncomment this to download the latest package when you run for the first time

# Import plotting libraries:
import panel as pn
import holoviews as hv
import hvplot.pandas
from holoviews import opts

In [None]:
# Create positions dictionary:
positions_dict={'50863':'INTC'}

# Create positions DataFrame:
positions_df=pd.DataFrame(list(positions_dict.items()),columns=['CIK','Ticker']) 

# Configure AlphaVantage API

In [None]:
import alpha_vantage as av
from alpha_vantage.alphavantage import AlphaVantage
from alpha_vantage.timeseries import TimeSeries as ts

In [None]:
# Setup AV API object:
av_api_key=os.getenv("av_api_key")
av_api='https://www.alphavantage.co/query?'

# Create time-series query:
ts_query=ts(key=av_api_key,output_format='pandas')

# Set query input variables:
interval='60min'
period=60
outputsize='full'
series_type='close'

In [None]:
# Set sleep interval (for testing with more/less tickers):
sleep=13

In [None]:
# Initiate tabs:
tabs=pn.Tabs()

# Loop through positions_df to pull stock price data into DataFrames:
for x in positions_df:

    # Get Time-Series
    print(f"Getting time-series for {x} ticker")
    data_ts,meta_data_ts=ts_query.get_intraday(symbol=x.ticker,
                                               interval=interval,
                                               outputsize=outputsize,
    )
    # Create time-series DataFrame
    print(f"Creating time-series DataFrame for {x} ticker")
    ts_df=pd.DataFrame.from_dict(data_ts)
    ts_df=ts_df.drop(['5. volume','3. low','2. high','1. open'],axis=1)
    ts_df=ts_df.rename(columns={'4. close':'Closing Price'})

    # Output data for ticker to .csv file:   
    print(f'Output data for {x} to a .csv file')
    ts_df.to_csv(('../Data/'+f'{x}'+'_ts_data.csv'),index=True)


# Loop to plot each tickers time-series data:
    for t in x:  
        # Create Time-Series plot:
        print(f'Creating Time-Series plot for {x}')
        ts_plot=ts_df.hvplot(kind='line',
                                             ylabel='Values',
                                             legend='top_left',
                                             title=f'Time-Series Plot for {x} Symbol',
                                             width=725,
                                             height=350,
        )
        
        # Create dashboard panel        
        ts_col=pn.Column(ts_plot)
        tab_row=pn.Row(ts_col)

        # Save chart as .html file
        tab_row.save(f'../{x}_TS_Plot.html')

    # Put the panel in a tab
    tabs.append((f'{x}',tab_row),)
    
    # Sleep
    print(f"Sleeping for {sleep} seconds before moving to next ticker...")
    time.sleep(sleep)   

# Serve the dashboard
print('Complete... Displaying dashboard in browser now')
pn.serve(title='Portfolio',panels=tabs)

# Configure SEC API

In [3]:
# Retrieve API Key:
sec_api_key = os.getenv("sec_api_key")

# Create API object:
sec_api = "https://api.sec-api.io?token=" + sec_api_key

In [None]:
# Set today's date:
today = date.today()

# Subtract number of years from today's date to set start_date filter:
start_date = today - relativedelta(years=5)

In [None]:
# Define API query filters: 
payload = {
  "query": { "query_string": { "query": "cik:50863" + " AND " + "filedAt:{" + f"{start_date}" + " TO " + f"{today}" + "} AND " + "formType:\"10-K\"" } },
  "from": "0",
  "size": "10",
  "sort": [{ "filedAt": { "order": "desc" } }]
}

# Format payload to JSON:
jsondata = json.dumps(payload)
jsondataasbytes = jsondata.encode('utf-8')

# Instantiate the request: 
req = urllib.request.Request(sec_api)

# Set the request header:
req.add_header('Content-Type', 'application/json; charset=utf-8')

# Set the request length:
req.add_header('Content-Length', len(jsondataasbytes))

# Send the request:
response = urllib.request.urlopen(req, jsondataasbytes)

# Read the response:
res_body = response.read()

# Transform the response to JSON:
filings = json.loads(res_body.decode("utf-8"))

# Print JSON:
print(filings)

# Begin NLP Analysis on Retrieved Documents:

In [None]:
# Convert JSON to a DataFrame for modeling:
filings = json_normalize(filings)
filings

In [None]:
# Create the sentiment scores list:
sentiments = []
for filing in filings['filings']:
    try:
        text = filing['filings']
#         date = filing['publishedAt'][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment['compound']
        pos = sentiment['pos']
        neu = sentiment['neu']
        neg = sentiment['neg']
        sentiments.append({
            'text':text,
            'date':date,
            'compound':compound,
            'positive':pos,
            'negative':neg,
            'neutral':neu 
        })
    except AttributeError:
        pass
    
# Create a DataFrame from the sentiment scores list:
sentiments_df = pd.DataFrame(sentiments)

# Reorder the columns:
columns = ['date','text','compound','positive','negative','neutral']
sentiments_df = sentiments_df[columns]

# Create Visuals To Display Sentiment Analysis Results: