# 10k Sentiment Analysis Project:

In [6]:
import os
import re
import time
import pandas as pd
from pandas import json_normalize
from datetime import date
from dateutil.relativedelta import relativedelta
from dotenv import load_dotenv
load_dotenv()
import json
import urllib.request
import xml.etree.ElementTree as ET
from collections import Counter
import numpy as np
from nltk import ngrams
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
# nltk.download('vader_lexicon')       # Uncomment this to download the latest package when you run for the first time
import panel as pn
import holoviews as hv
import hvplot.pandas
from holoviews import opts

In [None]:
# Set today's date:
today = date.today()

# Subtract number of years from today's date to set start_date filter:
start_date = today - relativedelta(years=5)

# Retrieve documents from SEC API

In [3]:
# Retrieve API Key:
TOKEN = os.getenv("sec_api_key")

# Create API object:
API = "https://api.sec-api.io?token=" + TOKEN

In [4]:
# Define API query filters: 
payload = {
  "query": { "query_string": { "query": "cik:50863" + " AND " + "filedAt:{" + f"{start_date}" + " TO " + f"{today}" + "} AND " + "formType:\"10-K\"" } },
#   "from": "0",
  "size": "10",
  "sort": [{ "filedAt": { "order": "desc" } }]
}

# Format payload to JSON:
jsondata = json.dumps(payload)
jsondataasbytes = jsondata.encode('utf-8')

# Instantiate the request: 
req = urllib.request.Request(API)

# Set the request header:
req.add_header('Content-Type', 'application/json; charset=utf-8')

# Set the request length:
req.add_header('Content-Length', len(jsondataasbytes))

# Send the request:
response = urllib.request.urlopen(req, jsondataasbytes)

# Read the response:
res_body = response.read()

# Transform the response to JSON:
filings = json.loads(res_body.decode("utf-8"))

# Print JSON:
print(filings)

{'total': {'value': 6, 'relation': 'eq'}, 'query': {'from': 0, 'size': 10}, 'filings': [{'id': '12d917fd6c3f2fd397a0bfc73a0fe5a7', 'accessionNo': '0000050863-21-000010', 'cik': '50863', 'ticker': 'INTC', 'companyName': 'INTEL CORP', 'companyNameLong': 'INTEL CORP (Filer)', 'formType': '10-K', 'description': 'Form 10-K - Annual report [Section 13 and 15(d), not S-K Item 405]', 'filedAt': '2021-01-21T19:06:04-05:00', 'linkToTxt': 'https://www.sec.gov/Archives/edgar/data/50863/000005086321000010/0000050863-21-000010.txt', 'linkToHtml': 'https://www.sec.gov/Archives/edgar/data/50863/000005086321000010/0000050863-21-000010-index.htm', 'linkToXbrl': '', 'linkToFilingDetails': 'https://www.sec.gov/Archives/edgar/data/50863/000005086321000010/intc-20201226.htm', 'entities': [{'companyName': 'INTEL CORP (Filer)', 'cik': '50863', 'irsNo': '941672743', 'stateOfIncorporation': 'DE', 'fiscalYearEnd': '1228', 'type': '10-K', 'act': '34', 'fileNo': '000-06217', 'filmNo': '21543276', 'sic': '3674 Semi

# Begin NLP Analysis on Retrieved Documents:

In [None]:
# Convert JSON to a DataFrame for modeling:
filings = json_normalize(filings)
filings

In [None]:
# Create the sentiment scores list:
sentiments = []
for filing in filings['filings']:
    try:
        text = filing['filings']
#         date = filing['publishedAt'][:10]
        sentiment = analyzer.polarity_scores(text)
        compound = sentiment['compound']
        pos = sentiment['pos']
        neu = sentiment['neu']
        neg = sentiment['neg']
        sentiments.append({
            'text':text,
            'date':date,
            'compound':compound,
            'positive':pos,
            'negative':neg,
            'neutral':neu 
        })
    except AttributeError:
        pass
    
# Create a DataFrame from the sentiment scores list:
sentiments_df = pd.DataFrame(sentiments)

# Reorder the columns:
columns = ['date','text','compound','positive','negative','neutral']
sentiments_df = sentiments_df[columns]

# Create Visuals To Display Sentiment Analysis Results: