# Labeling Sentiment of Annual Filings - Lexicon Method

## Import Libraries

In [1]:
# import necessary libraries
import pandas as pd
import time
import re
import os
import nltk
from nltk.corpus import stopwords
import string

# note: the nltk library will be used for textual preprocessing tasks
# note: the string library will be used to access a punctuation database

## Read Dataset to Pandas Dataframe

In [2]:
# read master_filing_df.csv file into a pandas dataframe
master_filing_df = pd.read_csv('master_filing_df.csv')
# display master_filing_df
display(master_filing_df.head())

Unnamed: 0,CIK,Ticker_Symbol,Company_Name,Data_Date,Filing_Date,Filing_Type,MDA_Placement,File_Name
0,1750,AIR,AAR CORP,2012-05-31,2012-07-19,10-K,10-K,AIR_2012-07-19.txt
1,1750,AIR,AAR CORP,2013-05-31,2013-07-26,10-K,10-K,AIR_2013-07-26.txt
2,1750,AIR,AAR CORP,2014-05-31,2014-07-17,10-K,10-K,AIR_2014-07-17.txt
3,1750,AIR,AAR CORP,2016-05-31,2016-07-13,10-K,10-K,AIR_2016-07-13.txt
4,1750,AIR,AAR CORP,2019-05-31,2019-07-18,10-K,10-K,AIR_2019-07-18.txt


## Define Function for Textual Preprocessing 

In [3]:
# note: in order to conduct lexicon-based sentiment analysis with the loughran and mcdonald dictionary
#       the following preprocessing steps are required
#       - tokenize given text into words
#       - removal of stop words
#       - removal of punctuation
#       - convert tokens into lower case word
#       - remove empty spaces that are classed as tokens
#       - remove ' that preceeds any single character or ' or ``

# define preprocessing function
def text_preprocessing(text):
    # tokenize text
    tokenized_text = nltk.word_tokenize(text)
    # remove stop words from 'tokenized_text'
    stop_words = set(stopwords.words('english'))
    tokenized_text = [word for word in tokenized_text if word not in stop_words]
    # remove amy punctuation
    tokenized_text = [word for word in tokenized_text if word not in string.punctuation]
    # convert tokens into lower case
    tokenized_text = [word.lower() for word in tokenized_text]
    # filter empty spaces
    tokenized_text = list(filter(None, tokenized_text))
    # remove ' that preceeds any single character or ' or ``
    tokenized_text = [word for word in tokenized_text if re.sub(r'^(\'([a-z]|\')|``)', '', word)]
    return tokenized_text

## Lexicon Model Implementation - Sentiment Analysis

In [4]:
# note: the loughran and mcdonald lexicon can be used to conduct sentiment analysis on the filing text files
# note: this will be done by counting positive and negative words in each document
# note: the sentiment score for each document will be calculated by subtracting the total number of negative words from positive words
#        and dividing by the sum of the total number of positive words and negative words
# note: the sentiment will be classed as follows:
#       - if sentiment score equal to 0 then sentiment class is neutral
#       - if sentiment score greater than 0 then sentiment class is positive
#       - if sentiment score less than 0 then sentiment class is negative

# store loughran and mcdonald positive and negative word lists in python lists
# note: these are stored in the excel file loughran_mcdonald_dictionary.xlsx
loughran_mcdonald_lexicon = pd.ExcelFile('loughran_mcdonald_dictionary.xlsx')
positive_word_df = pd.read_excel(loughran_mcdonald_lexicon, 'Positive', header=None, names=['Positive_Words'])
negative_word_df = pd.read_excel(loughran_mcdonald_lexicon, 'Negative', header=None, names=['Negative_Words'])
postive_words = positive_word_df['Positive_Words'].to_list()
negative_words = negative_word_df['Negative_Words'].to_list()
positive_words = [word.lower() for word in postive_words]
negative_words = [word.lower() for word in negative_words]

# define function to produce sentiment class
def lexicon_sentiment_class(text, positive_word_list, negative_word_list):
    # define dictionary for positive words
    positive_word_dictionary = {}
    # define dictionary for negative words
    negative_word_dictionary = {}
    
    # loop over words in positive dictionary and text
    for positive_word in positive_word_list:
        count = 0
        for word in text:
            if word==positive_word:
                count += 1
            else:
                continue
        positive_word_dictionary[positive_word] = count
    
    # loop over words in negative dictionary and text
    for negative_word in negative_word_list:
        count = 0
        for word in text:
            if word==negative_word:
                count += 1
            else:
                continue
        negative_word_dictionary[negative_word] = count
    
    # number of positive words in text
    number_of_positive_words = sum(positive_word_dictionary.values())
    # number of negative words in text
    number_of_negative_words = sum(negative_word_dictionary.values())
       
    # sentiment score calculation
    sentiment_score = (number_of_positive_words - number_of_negative_words) / (number_of_positive_words + number_of_negative_words)
    # sentiment classification
    if sentiment_score<=0:
        sentiment_class = 'Negative'
    else:
        sentiment_class = 'Positive'
    
    return number_of_positive_words, number_of_negative_words, sentiment_score, sentiment_class

In [5]:
# note: change directory path to 'Filing Text Files Folder'
try:
    os.chdir('Filing Text Files')
    print('directory changed')
except:
    print('already in directory!')

directory changed


In [6]:
# create new column to store the length of each text file
master_filing_df['Text_File_Length'] = ''
# create new column to store the number of positive words in master_filing_df
master_filing_df['No_of_Positive_Words'] = ''
# create new column to store the number if negative words in master_filing_df
master_filing_df['No_of_Negative_Words'] = ''
# create new column to store the sentiment score for given text in master_filing_df
master_filing_df['Sentiment_Score'] = ''
# create new column to store the sentiment class for given sentiment score in master_filing_df
master_filing_df['Sentiment_Class'] = ''

# loop through all files 
for idx, row in master_filing_df.iterrows():
    # open and read text from text file
    file_name = open(row['File_Name'], 'r')
    file_text = file_name.read()
    file_name.close()
    # calculate length of text file 
    master_filing_df.loc[idx, 'Text_File_Length'] = len(file_text)
    # preprocess text file
    preprocessed_file_text = text_preprocessing(file_text)
    # set relevamt variables in master_filing_df to 0 if list contains less than or equal to 250 tokens
    if len(preprocessed_file_text)<=250:
        master_filing_df.loc[idx, 'No_of_Positive_Words'] = 'N/A'
        master_filing_df.loc[idx, 'No_of_Negative_Words'] = 'N/A'
        master_filing_df.loc[idx, 'Sentiment_Score'] = 'N/A'
        master_filing_df.loc[idx, 'Sentiment_Class'] = 'N/A'
    else:
        # lexicon sentiement class function 
        number_of_positive_words, number_of_negative_words, sentiment_score, sentiment_class = lexicon_sentiment_class(preprocessed_file_text, positive_words,
                                                                                                                       negative_words)
        master_filing_df.loc[idx, 'No_of_Positive_Words'] = number_of_positive_words
        master_filing_df.loc[idx, 'No_of_Negative_Words'] = number_of_negative_words
        master_filing_df.loc[idx, 'Sentiment_Score'] = sentiment_score
        master_filing_df.loc[idx, 'Sentiment_Class'] = sentiment_class
    
    # provide user feedback
    print('--------------------------------------------------------------------------')
    print(idx, row['Ticker_Symbol'])
    print('text file length:', master_filing_df.loc[idx, 'Text_File_Length'])
    print('total number of positive words:', master_filing_df.loc[idx, 'No_of_Positive_Words'])
    print('total number of negative words:', master_filing_df.loc[idx, 'No_of_Negative_Words'])
    print('sentiment score:', master_filing_df.loc[idx, 'Sentiment_Score'])
    print('sentiment classification:', master_filing_df.loc[idx, 'Sentiment_Class'])
    print('\n')

--------------------------------------------------------------------------
0 AIR
text file length: 42087
total number of positive words: 38
total number of negative words: 71
sentiment score: -0.30275229357798167
sentiment classification: Negative


--------------------------------------------------------------------------
1 AIR
text file length: 35351
total number of positive words: 38
total number of negative words: 49
sentiment score: -0.12643678160919541
sentiment classification: Negative


--------------------------------------------------------------------------
2 AIR
text file length: 39449
total number of positive words: 33
total number of negative words: 42
sentiment score: -0.12
sentiment classification: Negative


--------------------------------------------------------------------------
3 AIR
text file length: 46544
total number of positive words: 31
total number of negative words: 73
sentiment score: -0.40384615384615385
sentiment classification: Negative


---------------

--------------------------------------------------------------------------
33 LUB
text file length: 89274
total number of positive words: 118
total number of negative words: 193
sentiment score: -0.24115755627009647
sentiment classification: Negative


--------------------------------------------------------------------------
34 LUB
text file length: 91178
total number of positive words: 131
total number of negative words: 177
sentiment score: -0.14935064935064934
sentiment classification: Negative


--------------------------------------------------------------------------
35 LUB
text file length: 105456
total number of positive words: 134
total number of negative words: 192
sentiment score: -0.17791411042944785
sentiment classification: Negative


--------------------------------------------------------------------------
36 LUB
text file length: 91508
total number of positive words: 108
total number of negative words: 172
sentiment score: -0.22857142857142856
sentiment classification

--------------------------------------------------------------------------
66 EBF
text file length: 44561
total number of positive words: 54
total number of negative words: 91
sentiment score: -0.25517241379310346
sentiment classification: Negative


--------------------------------------------------------------------------
67 EBF
text file length: 54651
total number of positive words: 75
total number of negative words: 122
sentiment score: -0.23857868020304568
sentiment classification: Negative


--------------------------------------------------------------------------
68 FOE
text file length: 71593
total number of positive words: 71
total number of negative words: 201
sentiment score: -0.47794117647058826
sentiment classification: Negative


--------------------------------------------------------------------------
69 GLT
text file length: 40106
total number of positive words: 40
total number of negative words: 71
sentiment score: -0.27927927927927926
sentiment classification: Negat

--------------------------------------------------------------------------
99 OSG
text file length: 113521
total number of positive words: 109
total number of negative words: 198
sentiment score: -0.2899022801302932
sentiment classification: Negative


--------------------------------------------------------------------------
100 OSG
text file length: 84062
total number of positive words: 87
total number of negative words: 164
sentiment score: -0.30677290836653387
sentiment classification: Negative


--------------------------------------------------------------------------
101 OSG
text file length: 48954
total number of positive words: 54
total number of negative words: 70
sentiment score: -0.12903225806451613
sentiment classification: Negative


--------------------------------------------------------------------------
102 OMI
text file length: 34833
total number of positive words: 28
total number of negative words: 62
sentiment score: -0.37777777777777777
sentiment classification: N

--------------------------------------------------------------------------
132 S
text file length: 136183
total number of positive words: 115
total number of negative words: 200
sentiment score: -0.2698412698412698
sentiment classification: Negative


--------------------------------------------------------------------------
133 UVV
text file length: 71170
total number of positive words: 127
total number of negative words: 114
sentiment score: 0.05394190871369295
sentiment classification: Positive


--------------------------------------------------------------------------
134 UVV
text file length: 67516
total number of positive words: 129
total number of negative words: 105
sentiment score: 0.10256410256410256
sentiment classification: Positive


--------------------------------------------------------------------------
135 UVV
text file length: 70088
total number of positive words: 141
total number of negative words: 116
sentiment score: 0.09727626459143969
sentiment classification: 

--------------------------------------------------------------------------
165 ATGE
text file length: 131128
total number of positive words: 124
total number of negative words: 308
sentiment score: -0.42592592592592593
sentiment classification: Negative


--------------------------------------------------------------------------
166 IVC
text file length: 87673
total number of positive words: 127
total number of negative words: 218
sentiment score: -0.263768115942029
sentiment classification: Negative


--------------------------------------------------------------------------
167 REX
text file length: 94937
total number of positive words: 94
total number of negative words: 200
sentiment score: -0.36054421768707484
sentiment classification: Negative


--------------------------------------------------------------------------
168 REX
text file length: 52651
total number of positive words: 59
total number of negative words: 117
sentiment score: -0.32954545454545453
sentiment classificatio

--------------------------------------------------------------------------
198 RS
text file length: 65043
total number of positive words: 75
total number of negative words: 77
sentiment score: -0.013157894736842105
sentiment classification: Negative


--------------------------------------------------------------------------
199 BHE
text file length: 47309
total number of positive words: 55
total number of negative words: 114
sentiment score: -0.34911242603550297
sentiment classification: Negative


--------------------------------------------------------------------------
200 BHE
text file length: 51628
total number of positive words: 54
total number of negative words: 125
sentiment score: -0.39664804469273746
sentiment classification: Negative


--------------------------------------------------------------------------
201 BHE
text file length: 52922
total number of positive words: 54
total number of negative words: 120
sentiment score: -0.3793103448275862
sentiment classification: N

--------------------------------------------------------------------------
231 ALG
text file length: 31809
total number of positive words: 35
total number of negative words: 76
sentiment score: -0.36936936936936937
sentiment classification: Negative


--------------------------------------------------------------------------
232 TWI
text file length: 60334
total number of positive words: 55
total number of negative words: 64
sentiment score: -0.07563025210084033
sentiment classification: Negative


--------------------------------------------------------------------------
233 TWI
text file length: 66968
total number of positive words: 59
total number of negative words: 150
sentiment score: -0.4354066985645933
sentiment classification: Negative


--------------------------------------------------------------------------
234 TWI
text file length: 63691
total number of positive words: 65
total number of negative words: 180
sentiment score: -0.46938775510204084
sentiment classification: Ne

--------------------------------------------------------------------------
264 SGU
text file length: 89941
total number of positive words: 75
total number of negative words: 175
sentiment score: -0.4
sentiment classification: Negative


--------------------------------------------------------------------------
265 SGU
text file length: 88734
total number of positive words: 73
total number of negative words: 171
sentiment score: -0.4016393442622951
sentiment classification: Negative


--------------------------------------------------------------------------
266 SGU
text file length: 79410
total number of positive words: 72
total number of negative words: 152
sentiment score: -0.35714285714285715
sentiment classification: Negative


--------------------------------------------------------------------------
267 SGU
text file length: 71630
total number of positive words: 66
total number of negative words: 142
sentiment score: -0.36538461538461536
sentiment classification: Negative


-----

--------------------------------------------------------------------------
297 DRQ
text file length: 59439
total number of positive words: 40
total number of negative words: 141
sentiment score: -0.5580110497237569
sentiment classification: Negative


--------------------------------------------------------------------------
298 SRI
text file length: 85053
total number of positive words: 131
total number of negative words: 193
sentiment score: -0.19135802469135801
sentiment classification: Negative


--------------------------------------------------------------------------
299 SRI
text file length: 69126
total number of positive words: 89
total number of negative words: 173
sentiment score: -0.32061068702290074
sentiment classification: Negative


--------------------------------------------------------------------------
300 FDP
text file length: 84276
total number of positive words: 84
total number of negative words: 222
sentiment score: -0.45098039215686275
sentiment classification:

--------------------------------------------------------------------------
330 DVN
text file length: 76064
total number of positive words: 31
total number of negative words: 171
sentiment score: -0.693069306930693
sentiment classification: Negative


--------------------------------------------------------------------------
331 DVN
text file length: 73499
total number of positive words: 59
total number of negative words: 188
sentiment score: -0.5222672064777328
sentiment classification: Negative


--------------------------------------------------------------------------
332 CIR
text file length: 40239
total number of positive words: 49
total number of negative words: 82
sentiment score: -0.25190839694656486
sentiment classification: Negative


--------------------------------------------------------------------------
333 MTRN
text file length: 111702
total number of positive words: 192
total number of negative words: 217
sentiment score: -0.061124694376528114
sentiment classification:

--------------------------------------------------------------------------
363 ASPN
text file length: 87264
total number of positive words: 77
total number of negative words: 112
sentiment score: -0.18518518518518517
sentiment classification: Negative


--------------------------------------------------------------------------
364 ASPN
text file length: 86864
total number of positive words: 75
total number of negative words: 103
sentiment score: -0.15730337078651685
sentiment classification: Negative


--------------------------------------------------------------------------
365 ASPN
text file length: 88271
total number of positive words: 76
total number of negative words: 102
sentiment score: -0.14606741573033707
sentiment classification: Negative


--------------------------------------------------------------------------
366 LRN
text file length: 87440
total number of positive words: 59
total number of negative words: 131
sentiment score: -0.37894736842105264
sentiment classificati

--------------------------------------------------------------------------
396 HIL
text file length: 53546
total number of positive words: 57
total number of negative words: 89
sentiment score: -0.2191780821917808
sentiment classification: Negative


--------------------------------------------------------------------------
397 HIL
text file length: 56841
total number of positive words: 59
total number of negative words: 100
sentiment score: -0.2578616352201258
sentiment classification: Negative


--------------------------------------------------------------------------
398 WTI
text file length: 85007
total number of positive words: 56
total number of negative words: 147
sentiment score: -0.4482758620689655
sentiment classification: Negative


--------------------------------------------------------------------------
399 MIC
text file length: 115216
total number of positive words: 99
total number of negative words: 158
sentiment score: -0.22957198443579765
sentiment classification: Ne

--------------------------------------------------------------------------
429 CXO
text file length: 117368
total number of positive words: 89
total number of negative words: 169
sentiment score: -0.31007751937984496
sentiment classification: Negative


--------------------------------------------------------------------------
430 VSI
text file length: 34394
total number of positive words: 36
total number of negative words: 59
sentiment score: -0.24210526315789474
sentiment classification: Negative


--------------------------------------------------------------------------
431 UFS
text file length: 133612
total number of positive words: 189
total number of negative words: 382
sentiment score: -0.3380035026269702
sentiment classification: Negative


--------------------------------------------------------------------------
432 UFS
text file length: 135825
total number of positive words: 154
total number of negative words: 384
sentiment score: -0.4275092936802974
sentiment classificatio

--------------------------------------------------------------------------
462 IPI
text file length: 70766
total number of positive words: 54
total number of negative words: 164
sentiment score: -0.5045871559633027
sentiment classification: Negative


--------------------------------------------------------------------------
463 IPI
text file length: 70963
total number of positive words: 65
total number of negative words: 136
sentiment score: -0.35323383084577115
sentiment classification: Negative


--------------------------------------------------------------------------
464 IPI
text file length: 64511
total number of positive words: 48
total number of negative words: 89
sentiment score: -0.29927007299270075
sentiment classification: Negative


--------------------------------------------------------------------------
465 UAN
text file length: 83533
total number of positive words: 65
total number of negative words: 130
sentiment score: -0.3333333333333333
sentiment classification: Ne

--------------------------------------------------------------------------
495 AMRC
text file length: 94498
total number of positive words: 85
total number of negative words: 144
sentiment score: -0.2576419213973799
sentiment classification: Negative


--------------------------------------------------------------------------
496 SEMG
text file length: 80618
total number of positive words: 46
total number of negative words: 118
sentiment score: -0.43902439024390244
sentiment classification: Negative


--------------------------------------------------------------------------
497 SEMG
text file length: 74495
total number of positive words: 57
total number of negative words: 159
sentiment score: -0.4722222222222222
sentiment classification: Negative


--------------------------------------------------------------------------
498 KNX
text file length: 98994
total number of positive words: 102
total number of negative words: 114
sentiment score: -0.05555555555555555
sentiment classificatio

--------------------------------------------------------------------------
528 AGN
text file length: 91541
total number of positive words: 91
total number of negative words: 204
sentiment score: -0.38305084745762713
sentiment classification: Negative


--------------------------------------------------------------------------
529 VNCE
text file length: 73714
total number of positive words: 63
total number of negative words: 148
sentiment score: -0.4028436018957346
sentiment classification: Negative


--------------------------------------------------------------------------
530 NEWM
text file length: 94185
total number of positive words: 116
total number of negative words: 173
sentiment score: -0.1972318339100346
sentiment classification: Negative


--------------------------------------------------------------------------
531 NEWM
text file length: 83219
total number of positive words: 105
total number of negative words: 153
sentiment score: -0.18604651162790697
sentiment classificati

--------------------------------------------------------------------------
561 VSLR
text file length: 97702
total number of positive words: 57
total number of negative words: 177
sentiment score: -0.5128205128205128
sentiment classification: Negative


--------------------------------------------------------------------------
562 CJ
text file length: 39491
total number of positive words: 29
total number of negative words: 141
sentiment score: -0.6588235294117647
sentiment classification: Negative


--------------------------------------------------------------------------
563 CJ
text file length: 114366
total number of positive words: 167
total number of negative words: 240
sentiment score: -0.17936117936117937
sentiment classification: Negative


--------------------------------------------------------------------------
564 CJ
text file length: 89157
total number of positive words: 139
total number of negative words: 198
sentiment score: -0.17507418397626112
sentiment classification: 

--------------------------------------------------------------------------
594 TK
text file length: 225676
total number of positive words: 138
total number of negative words: 371
sentiment score: -0.4577603143418468
sentiment classification: Negative


--------------------------------------------------------------------------
595 TK
text file length: 211951
total number of positive words: 110
total number of negative words: 388
sentiment score: -0.5582329317269076
sentiment classification: Negative


--------------------------------------------------------------------------
596 TK
text file length: 187710
total number of positive words: 117
total number of negative words: 333
sentiment score: -0.48
sentiment classification: Negative


--------------------------------------------------------------------------
597 CYD
text file length: 82446
total number of positive words: 84
total number of negative words: 115
sentiment score: -0.15577889447236182
sentiment classification: Negative


--

--------------------------------------------------------------------------
627 DSX
text file length: 74331
total number of positive words: 29
total number of negative words: 81
sentiment score: -0.4727272727272727
sentiment classification: Negative


--------------------------------------------------------------------------
628 DSX
text file length: 75467
total number of positive words: 35
total number of negative words: 94
sentiment score: -0.4573643410852713
sentiment classification: Negative


--------------------------------------------------------------------------
629 DSX
text file length: 81240
total number of positive words: 35
total number of negative words: 102
sentiment score: -0.48905109489051096
sentiment classification: Negative


--------------------------------------------------------------------------
630 DSX
text file length: 69767
total number of positive words: 28
total number of negative words: 90
sentiment score: -0.5254237288135594
sentiment classification: Negat

--------------------------------------------------------------------------
660 TOO
text file length: 144280
total number of positive words: 99
total number of negative words: 231
sentiment score: -0.4
sentiment classification: Negative


--------------------------------------------------------------------------
661 TOO
text file length: 156009
total number of positive words: 95
total number of negative words: 239
sentiment score: -0.4311377245508982
sentiment classification: Negative


--------------------------------------------------------------------------
662 TOO
text file length: 147716
total number of positive words: 106
total number of negative words: 230
sentiment score: -0.36904761904761907
sentiment classification: Negative


--------------------------------------------------------------------------
663 CZZ
text file length: 62160
total number of positive words: 57
total number of negative words: 48
sentiment score: 0.08571428571428572
sentiment classification: Positive


---

--------------------------------------------------------------------------
693 GSL
text file length: 85035
total number of positive words: 69
total number of negative words: 159
sentiment score: -0.39473684210526316
sentiment classification: Negative


--------------------------------------------------------------------------
694 GSL
text file length: 92608
total number of positive words: 40
total number of negative words: 169
sentiment score: -0.6172248803827751
sentiment classification: Negative


--------------------------------------------------------------------------
695 SB
text file length: 71005
total number of positive words: 47
total number of negative words: 95
sentiment score: -0.3380281690140845
sentiment classification: Negative


--------------------------------------------------------------------------
696 NNA
text file length: 106303
total number of positive words: 49
total number of negative words: 154
sentiment score: -0.5172413793103449
sentiment classification: Neg

--------------------------------------------------------------------------
726 DLNG
text file length: 80113
total number of positive words: 42
total number of negative words: 128
sentiment score: -0.5058823529411764
sentiment classification: Negative


--------------------------------------------------------------------------
727 NVGS
text file length: 79292
total number of positive words: 41
total number of negative words: 86
sentiment score: -0.3543307086614173
sentiment classification: Negative


--------------------------------------------------------------------------
728 NVGS
text file length: 92614
total number of positive words: 47
total number of negative words: 101
sentiment score: -0.36486486486486486
sentiment classification: Negative


--------------------------------------------------------------------------
729 SALT
text file length: 76548
total number of positive words: 53
total number of negative words: 102
sentiment score: -0.3161290322580645
sentiment classification:

--------------------------------------------------------------------------
759 TECK
text file length: 145464
total number of positive words: 144
total number of negative words: 223
sentiment score: -0.21525885558583105
sentiment classification: Negative


--------------------------------------------------------------------------
760 TECK
text file length: 188171
total number of positive words: 165
total number of negative words: 274
sentiment score: -0.24829157175398633
sentiment classification: Negative


--------------------------------------------------------------------------
761 TECK
text file length: 201815
total number of positive words: 171
total number of negative words: 313
sentiment score: -0.29338842975206614
sentiment classification: Negative


--------------------------------------------------------------------------
762 EGO
text file length: 68380
total number of positive words: 42
total number of negative words: 111
sentiment score: -0.45098039215686275
sentiment classi

--------------------------------------------------------------------------
792 TAC
text file length: 253410
total number of positive words: 269
total number of negative words: 466
sentiment score: -0.2680272108843537
sentiment classification: Negative


--------------------------------------------------------------------------
793 TAC
text file length: 236086
total number of positive words: 291
total number of negative words: 418
sentiment score: -0.17912552891396333
sentiment classification: Negative


--------------------------------------------------------------------------
794 TAC
text file length: 290783
total number of positive words: 394
total number of negative words: 499
sentiment score: -0.11758118701007839
sentiment classification: Negative


--------------------------------------------------------------------------
795 TAC
text file length: 308852
total number of positive words: 402
total number of negative words: 587
sentiment score: -0.1870576339737108
sentiment classific

--------------------------------------------------------------------------
825 HBM
text file length: 136805
total number of positive words: 134
total number of negative words: 253
sentiment score: -0.30749354005167956
sentiment classification: Negative


--------------------------------------------------------------------------
826 HBM
text file length: 146176
total number of positive words: 123
total number of negative words: 278
sentiment score: -0.3865336658354115
sentiment classification: Negative


--------------------------------------------------------------------------
827 HBM
text file length: 124452
total number of positive words: 117
total number of negative words: 211
sentiment score: -0.2865853658536585
sentiment classification: Negative


--------------------------------------------------------------------------
828 HBM
text file length: 140626
total number of positive words: 162
total number of negative words: 250
sentiment score: -0.21359223300970873
sentiment classific

--------------------------------------------------------------------------
858 FTS
text file length: 231287
total number of positive words: 227
total number of negative words: 466
sentiment score: -0.3448773448773449
sentiment classification: Negative


--------------------------------------------------------------------------
859 FTS
text file length: 190951
total number of positive words: 227
total number of negative words: 447
sentiment score: -0.3264094955489614
sentiment classification: Negative


--------------------------------------------------------------------------
860 NTR
text file length: 172212
total number of positive words: 314
total number of negative words: 294
sentiment score: 0.03289473684210526
sentiment classification: Positive




## Portfolio Analysis and Feasibility Check

In [7]:
# note: a feasibility study is necessary to see whether this lexicon method of labeling the sentiment of
#       the text files differentiates good investments from bad ones
#       the returns of each stock will be analyzed using the stock_price_df_copy.csv file

# go back to prototype two notebooks folder
# note: change directory path to 'Filing Text Files Folder'
os.chdir('..')

# read stock_price_df_copy.csv into a pandas dataframe
stock_price_df = pd.read_csv('stock_price_df_copy.csv')

# find the number of positive negative text, N/A files
number_of_positive_files = master_filing_df.query('Sentiment_Class=="Positive"').shape[0]
number_of_negative_files = master_filing_df.query('Sentiment_Class=="Negative"').shape[0]
number_of_na_files = master_filing_df.query('Sentiment_Class=="N/A"').shape[0]

In [8]:
# create a new dataframe to store returns for stocks in master_filing_df
# note: this dataframe will not include 'N/A' observations
lexicon_returns_df = master_filing_df.query('Sentiment_Class!="N/A"').reset_index(drop=True)

# create a new column to store the F-SCORE 
lexicon_returns_df['F_SCORE'] = 0

# create a new column to store long holding period return
lexicon_returns_df['Long_Holding_Period_Return_(%)'] = 0
# create a new column to store short holding period return
lexicon_returns_df['Short_Holding_Period_Return_(%)'] = 0
# create a new column to store annualized long holding period return
lexicon_returns_df['Long_Annualized_Holding_Period_Return_(%)'] = 0
# create a new column to store annualized short holding period return
lexicon_returns_df['Short_Annualized_Holding_Period_Return_(%)'] = 0
# create a new column to store long excess reurns
lexicon_returns_df['Long_Excess_Returns_(%)'] = 0 
# create a new column to store short excess reurns
lexicon_returns_df['Short_Excess_Returns_(%)'] = 0 

# get returns from stock_price_df
for idx, row in lexicon_returns_df.iterrows():
    for idx_a, row_a in stock_price_df.iterrows():
        if row['Data_Date']==row_a['Data_Date'] and row['Ticker_Symbol']==row_a['Ticker_Symbol']:
            lexicon_returns_df.loc[idx, 'F_SCORE'] = row_a['F_SCORE']
            lexicon_returns_df.loc[idx, 'Long_Holding_Period_Return_(%)'] = row_a['Long_Holding_Period_Return_(%)']
            lexicon_returns_df.loc[idx, 'Short_Holding_Period_Return_(%)'] = row_a['Short_Holding_Period_Return_(%)']
            lexicon_returns_df.loc[idx, 'Long_Annualized_Holding_Period_Return_(%)'] = row_a['Long_Annualized_Holding_Period_Return_(%)']
            lexicon_returns_df.loc[idx, 'Short_Annualized_Holding_Period_Return_(%)'] = row_a['Short_Annualized_Holding_Period_Return_(%)']
            lexicon_returns_df.loc[idx, 'Long_Excess_Returns_(%)'] = row_a['Long_Excess_Returns_(%)']
            lexicon_returns_df.loc[idx, 'Short_Excess_Returns_(%)'] = row_a['Short_Excess_Returns_(%)']
        else:
            continue


In [9]:
# check if any return columns in lexicon_returns_df has nan values
# if there are any nan values, drop row from lexicon_returns_df

# check if there are any nan values
print('number of nan values in lexicon_returns_df:', lexicon_returns_df.isnull().sum().sum())
# drop rows containing nan values
lexicon_returns_df = lexicon_returns_df.dropna()

number of nan values in lexicon_returns_df: 6


In [10]:
# note: various portfolio formations will be analyzed to identify the best combination of stocks
# note: a function will be defined to calculate various statistics of the various portfolios

def portfolio_analysis_statistics(portfolio_df):
    # mean of returns
    average_long_hpr = portfolio_df['Long_Holding_Period_Return_(%)'].mean()
    average_short_hpr = portfolio_df['Short_Holding_Period_Return_(%)'].mean()
    average_long_annaulized_hpr = portfolio_df['Long_Annualized_Holding_Period_Return_(%)'].mean()
    average_short_annaulized_hpr = portfolio_df['Short_Annualized_Holding_Period_Return_(%)'].mean()
    average_long_excess_returns = portfolio_df['Long_Excess_Returns_(%)'].mean()
    average_short_excess_returns = portfolio_df['Short_Excess_Returns_(%)'].mean()
    # median returns
    median_long_hpr = portfolio_df['Long_Holding_Period_Return_(%)'].median()
    median_short_hpr = portfolio_df['Short_Holding_Period_Return_(%)'].median()
    median_long_annaulized_hpr = portfolio_df['Long_Annualized_Holding_Period_Return_(%)'].median()
    median_short_annaulized_hpr = portfolio_df['Short_Annualized_Holding_Period_Return_(%)'].median()
    median_long_excess_returns = portfolio_df['Long_Excess_Returns_(%)'].median()
    median_short_excess_returns = portfolio_df['Short_Excess_Returns_(%)'].median()
    # 10th percentile returns
    percentile_10_long_hpr = portfolio_df['Long_Holding_Period_Return_(%)'].quantile(0.1)
    percentile_10_short_hpr = portfolio_df['Short_Holding_Period_Return_(%)'].quantile(0.1)
    percentile_10_long_annaulized_hpr = portfolio_df['Long_Annualized_Holding_Period_Return_(%)'].quantile(0.1)
    percentile_10_short_annaulized_hpr = portfolio_df['Short_Annualized_Holding_Period_Return_(%)'].quantile(0.1)
    percentile_10_long_excess_returns = portfolio_df['Long_Excess_Returns_(%)'].quantile(0.1)
    percentile_10_short_excess_returns = portfolio_df['Short_Excess_Returns_(%)'].quantile(0.1)
    # 25th percentile returns 
    percentile_25_long_hpr = portfolio_df['Long_Holding_Period_Return_(%)'].quantile(0.25)
    percentile_25_short_hpr = portfolio_df['Short_Holding_Period_Return_(%)'].quantile(0.25)
    percentile_25_long_annaulized_hpr = portfolio_df['Long_Annualized_Holding_Period_Return_(%)'].quantile(0.25)
    percentile_25_short_annaulized_hpr = portfolio_df['Short_Annualized_Holding_Period_Return_(%)'].quantile(0.25)
    percentile_25_long_excess_returns = portfolio_df['Long_Excess_Returns_(%)'].quantile(0.25)
    percentile_25_short_excess_returns = portfolio_df['Short_Excess_Returns_(%)'].quantile(0.25)
    # 75th percentile returns
    percentile_75_long_hpr = portfolio_df['Long_Holding_Period_Return_(%)'].quantile(0.75)
    percentile_75_short_hpr = portfolio_df['Short_Holding_Period_Return_(%)'].quantile(0.75)
    percentile_75_long_annaulized_hpr = portfolio_df['Long_Annualized_Holding_Period_Return_(%)'].quantile(0.75)
    percentile_75_short_annaulized_hpr = portfolio_df['Short_Annualized_Holding_Period_Return_(%)'].quantile(0.75)
    percentile_75_long_excess_returns = portfolio_df['Long_Excess_Returns_(%)'].quantile(0.75)
    percentile_75_short_excess_returns = portfolio_df['Short_Excess_Returns_(%)'].quantile(0.75)
    # 90th percentile returns
    percentile_90_long_hpr = portfolio_df['Long_Holding_Period_Return_(%)'].quantile(0.9)
    percentile_90_short_hpr = portfolio_df['Short_Holding_Period_Return_(%)'].quantile(0.9)
    percentile_90_long_annaulized_hpr = portfolio_df['Long_Annualized_Holding_Period_Return_(%)'].quantile(0.9)
    percentile_90_short_annaulized_hpr = portfolio_df['Short_Annualized_Holding_Period_Return_(%)'].quantile(0.9)
    percentile_90_long_excess_returns = portfolio_df['Long_Excess_Returns_(%)'].quantile(0.9)
    percentile_90_short_excess_returns = portfolio_df['Short_Excess_Returns_(%)'].quantile(0.9)

    print('number of observations:', portfolio_df.shape[0])
    print('\n')
    print('--------------------------------------------------------------------------------')
    print('mean returns')
    print('--------------------------------------------------------------------------------')
    print('long holding period return:', round((average_long_hpr), 3))
    print('short holding period return:', round((average_short_hpr), 3))
    print('long annualized holding period return:', round((average_long_annaulized_hpr), 3))
    print('short annualized holding period return:', round((average_short_annaulized_hpr), 3))
    print('long excess return:', round((average_long_excess_returns), 3))
    print('short excess return:', round((average_short_excess_returns), 3))
    print('\n')
    print('--------------------------------------------------------------------------------')
    print('median returns')
    print('--------------------------------------------------------------------------------')
    print('long holding period return:', round((median_long_hpr), 3))
    print('short holding period return:', round((median_short_hpr), 3))
    print('long annualized holding period return:', round((median_long_annaulized_hpr), 3))
    print('short annualized holding period return:', round((median_short_annaulized_hpr), 3))
    print('long excess return:', round((median_long_excess_returns), 3))
    print('short excess return:', round((median_short_excess_returns), 3))
    print('\n')
    print('--------------------------------------------------------------------------------')
    print('10th percentile returns')
    print('--------------------------------------------------------------------------------')
    print('long holding period return:', round((percentile_10_long_hpr), 3))
    print('short holding period return:', round((percentile_10_short_hpr), 3))
    print('long annualized holding period return:', round((percentile_10_long_annaulized_hpr), 3))
    print('short annualized holding period return:', round((percentile_10_short_annaulized_hpr), 3))
    print('long excess return:', round((percentile_10_long_excess_returns), 3))
    print('short excess return:', round((percentile_10_short_excess_returns), 3))
    print('\n')
    print('--------------------------------------------------------------------------------')
    print('25th percentile returns')
    print('--------------------------------------------------------------------------------')
    print('long holding period return:', round((percentile_25_long_hpr), 3))
    print('short holding period return:', round((percentile_25_short_hpr), 3))
    print('long annualized holding period return:', round((percentile_25_long_annaulized_hpr), 3))
    print('short annualized holding period return:', round((percentile_25_short_annaulized_hpr), 3))
    print('long excess return:', round((percentile_25_long_excess_returns), 3))
    print('short excess return:', round((percentile_25_short_excess_returns), 3))
    print('\n')
    print('--------------------------------------------------------------------------------')
    print('75th percentile returns')
    print('--------------------------------------------------------------------------------')
    print('long holding period return:', round((percentile_75_long_hpr), 3))
    print('short holding period return:', round((percentile_75_short_hpr), 3))
    print('long annualized holding period return:', round((percentile_75_long_annaulized_hpr), 3))
    print('short annualized holding period return:', round((percentile_75_short_annaulized_hpr), 3))
    print('long excess return:', round((percentile_75_long_excess_returns), 3))
    print('short excess return:', round((percentile_75_short_excess_returns), 3))
    print('\n')
    print('--------------------------------------------------------------------------------')
    print('90th percentile returns')
    print('--------------------------------------------------------------------------------')
    print('long holding period return:', round((percentile_90_long_hpr), 3))
    print('short holding period return:', round((percentile_90_short_hpr), 3))
    print('long annualized holding period return:', round((percentile_90_long_annaulized_hpr), 3))
    print('short annualized holding period return:', round((percentile_90_short_annaulized_hpr), 3))
    print('long excess return:', round((percentile_90_long_excess_returns), 3))
    print('short excess return:', round((percentile_90_short_excess_returns), 3))
    print('\n')
    return None

In [11]:
# analysis of text files that have been labeled with positive sentiment
print('POSITIVE SENTIMENT PORTFOLIO')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[lexicon_returns_df['Sentiment_Class']=="Positive"])

POSITIVE SENTIMENT PORTFOLIO


number of observations: 38


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: -0.009
short holding period return: 0.278
long annualized holding period return: -0.041
short annualized holding period return: 0.471
long excess return: -0.14
short excess return: 0.556


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: -0.017
short holding period return: 0.017
long annualized holding period return: -0.019
short annualized holding period return: 0.019
long excess return: -0.106
short excess return: 0.097


--------------------------------------------------------------------------------
10th percentile returns
------------------------------------------------

In [12]:
# analysis of text files that have been labeled with negative sentiment
print('NEGATIVE SENTIMENT PORTFOLIO')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[lexicon_returns_df['Sentiment_Class']=="Negative"])

NEGATIVE SENTIMENT PORTFOLIO


number of observations: 817


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.196
short holding period return: 0.387
long annualized holding period return: 1802.726
short annualized holding period return: 0.913
long excess return: 1802.619
short excess return: 1.004


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.001
short holding period return: -0.001
long annualized holding period return: 0.001
short annualized holding period return: -0.001
long excess return: -0.106
short excess return: 0.088


--------------------------------------------------------------------------------
10th percentile returns
-------------------------------------------

In [32]:
# analysis of text files that have been labeled with negative sentiment (without outlier)
print('NEGATIVE SENTIMENT PORTFOLIO')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[lexicon_returns_df['Sentiment_Class']=="Negative"].drop(78))

NEGATIVE SENTIMENT PORTFOLIO


number of observations: 816


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.196
short holding period return: 0.388
long annualized holding period return: 0.926
short annualized holding period return: 0.915
long excess return: 0.82
short excess return: 1.006


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.001
short holding period return: -0.001
long annualized holding period return: 0.001
short annualized holding period return: -0.001
long excess return: -0.106
short excess return: 0.089


--------------------------------------------------------------------------------
10th percentile returns
--------------------------------------------------

In [13]:
# analysis of all text files
print('COMPLETE PORTFOLIO')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df)

COMPLETE PORTFOLIO


number of observations: 855


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.187
short holding period return: 0.382
long annualized holding period return: 1722.603
short annualized holding period return: 0.893
long excess return: 1722.497
short excess return: 0.984


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.0
short holding period return: 0.0
long annualized holding period return: 0.0
short annualized holding period return: 0.0
long excess return: -0.106
short excess return: 0.09


--------------------------------------------------------------------------------
10th percentile returns
----------------------------------------------------------------

In [31]:
# analysis of all text files (excluding major outlier in index 78)
print('COMPLETE PORTFOLIO (EXCLUDING MAJOR OUTLIER)')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.drop(78))

COMPLETE PORTFOLIO (EXCLUDING MAJOR OUTLIER)


number of observations: 854


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.187
short holding period return: 0.383
long annualized holding period return: 0.883
short annualized holding period return: 0.896
long excess return: 0.778
short excess return: 0.986


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.0
short holding period return: 0.0
long annualized holding period return: 0.0
short annualized holding period return: 0.0
long excess return: -0.106
short excess return: 0.091


--------------------------------------------------------------------------------
10th percentile returns
-------------------------------------------

In [27]:
# analysis of text files that have been labeled with positive sentiment and high F-SCORE
print('POSITIVE SENTIMENT PORTFOLIO FOR HIGH F-SCORE OBSERVATIONS')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[(lexicon_returns_df['Sentiment_Class']=="Positive") & 
                                                     ((lexicon_returns_df['F_SCORE']==8) | 
                                                      (lexicon_returns_df['F_SCORE']==9))])

POSITIVE SENTIMENT PORTFOLIO FOR HIGH F-SCORE OBSERVATIONS


number of observations: 2


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.173
short holding period return: -0.049
long annualized holding period return: 0.174
short annualized holding period return: -0.05
long excess return: 0.105
short excess return: 0.013


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.173
short holding period return: -0.049
long annualized holding period return: 0.174
short annualized holding period return: -0.05
long excess return: 0.105
short excess return: 0.013


--------------------------------------------------------------------------------
10th percentile returns
----------------------

In [28]:
# analysis of text files that have been labeled with positive sentiment and high F-SCORE (with 7 also)
print('POSITIVE SENTIMENT PORTFOLIO FOR HIGH F-SCORE OBSERVATIONS')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[(lexicon_returns_df['Sentiment_Class']=="Positive") & 
                                                     ((lexicon_returns_df['F_SCORE']==8) | 
                                                      (lexicon_returns_df['F_SCORE']==9) |
                                                      (lexicon_returns_df['F_SCORE']==7))])

POSITIVE SENTIMENT PORTFOLIO FOR HIGH F-SCORE OBSERVATIONS


number of observations: 6


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.016
short holding period return: 0.153
long annualized holding period return: -0.029
short annualized holding period return: 0.545
long excess return: -0.135
short excess return: 0.635


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.004
short holding period return: -0.003
long annualized holding period return: -0.021
short annualized holding period return: 0.025
long excess return: -0.11
short excess return: 0.097


--------------------------------------------------------------------------------
10th percentile returns
--------------------

In [29]:
# analysis of text files that have been labeled with negative sentiment and low F-SCORE
print('NEGATIVE SENTIMENT PORTFOLIO FOR LOW F-SCORE OBSERVATIONS')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[(lexicon_returns_df['Sentiment_Class']=="Negative") & 
                                                     ((lexicon_returns_df['F_SCORE']==0) |
                                                     (lexicon_returns_df['F_SCORE']==1))])

NEGATIVE SENTIMENT PORTFOLIO FOR LOW F-SCORE OBSERVATIONS


number of observations: 10


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: -0.079
short holding period return: 0.538
long annualized holding period return: -0.115
short annualized holding period return: 0.709
long excess return: -0.164
short excess return: 0.754


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: -0.026
short holding period return: 0.027
long annualized holding period return: -0.065
short annualized holding period return: 0.074
long excess return: -0.067
short excess return: 0.077


--------------------------------------------------------------------------------
10th percentile returns
------------------

In [30]:
# analysis of text files that have been labeled with negative sentiment and low F-SCORE (with 2 also)
print('NEGATIVE SENTIMENT PORTFOLIO FOR LOW F-SCORE OBSERVATIONS')
print('\n')
portfolio_analysis_statistics(lexicon_returns_df.loc[(lexicon_returns_df['Sentiment_Class']=="Negative") & 
                                                     ((lexicon_returns_df['F_SCORE']==0) |
                                                     (lexicon_returns_df['F_SCORE']==1) |
                                                     (lexicon_returns_df['F_SCORE']==2))])

NEGATIVE SENTIMENT PORTFOLIO FOR LOW F-SCORE OBSERVATIONS


number of observations: 54


--------------------------------------------------------------------------------
mean returns
--------------------------------------------------------------------------------
long holding period return: 0.072
short holding period return: 0.24
long annualized holding period return: 0.541
short annualized holding period return: 0.544
long excess return: 0.459
short excess return: 0.615


--------------------------------------------------------------------------------
median returns
--------------------------------------------------------------------------------
long holding period return: 0.0
short holding period return: 0.0
long annualized holding period return: 0.0
short annualized holding period return: 0.0
long excess return: -0.058
short excess return: 0.052


--------------------------------------------------------------------------------
10th percentile returns
--------------------------------

In [15]:
# analysis of going long on text files with positive sentiment and going short on text files with negative sentiment
positive_negative_portfolio_df = lexicon_returns_df

# define variables to store returns
hpr = 0
annualized_hpr = 0
excess_return = 0

for idx, row in positive_negative_portfolio_df.iterrows():
    if row['Sentiment_Class']=='Negative':
        hpr = hpr + row['Short_Holding_Period_Return_(%)']
        annualized_hpr = annualized_hpr + row['Short_Annualized_Holding_Period_Return_(%)']
        excess_return = excess_return + row['Short_Excess_Returns_(%)']        
    else:
        hpr = hpr + row['Long_Holding_Period_Return_(%)']
        annualized_hpr = annualized_hpr + row['Long_Annualized_Holding_Period_Return_(%)']
        excess_return = excess_return + row['Long_Excess_Returns_(%)']    

# calculate averages of returns
average_hpr = hpr / positive_negative_portfolio_df.shape[0]
average_annualized_hpr = annualized_hpr / positive_negative_portfolio_df.shape[0]
average_excess_return = excess_return / positive_negative_portfolio_df.shape[0]

print('LONG POSITIVE SENTIMENT TEXT AND SHORT NEGATIVE SENTIMENT TEXT')
print('\n')
print('---------------------------------------------------------------------------------------')
print('average holding period return: ', round(average_hpr*100, 3), '%', sep='')
print('average annaulized holding period return: ', round(average_annualized_hpr*100, 3), '%', sep='')
print('average excess return: ', round(average_excess_return*100, 3), '%', sep='')

LONG POSITIVE SENTIMENT TEXT AND SHORT NEGATIVE SENTIMENT TEXT


---------------------------------------------------------------------------------------
average holding period return: 36.92%
average annaulized holding period return: 87.069%
average excess return: 95.315%


## Save lexicon_returns_df to CSV File

In [16]:
# save lexicon_returns_df to csv file
lexicon_returns_df.to_csv('lexicon_returns_df.csv', index=False)