In [76]:
from pathlib import Path
import pandas as pd
import re
from collections import Counter
import string

Getting the shareholder letter information

In [77]:
shareholder_letters=Path('shareholder_letters.csv')

letters_df=pd.read_csv(shareholder_letters,delimiter=',',encoding='utf-8',errors='ignore')
letters_df.head(10)

Unnamed: 0,Rank,Company,Industry,Employees,"Revenue (in millions, USD)","Valuation (in millions, USD)","Profits (in millions, USD)",Profits (% of Sales),Ticker,Letter,Overflow
0,1,Walmart,General Merchandisers,2100000,611289,397475.0,11680,1.9,WMT,"Dear Shareholders and Associates, Thank you. G...",
1,2,Amazon.com,Internet Services and Retailing,1541000,513983,1058440.0,-2722,-0.5,AMZN,Dear shareholders: As I sit down to write my s...,
2,3,Exxon Mobil,Petroleum Refining,62000,413680,446424.0,55740,13.5,XOM,The world needs reliable and affordable energy...,
3,4,Apple,"Computers, Office Equipment",164000,394328,2609039.0,99803,25.3,AAPL,"To our shareholders, Over the past year, teams...",
4,5,UnitedHealth Group,Health Care: Insurance and Managed Care,400000,324162,440854.0,20120,6.2,UNH,"Dear Fellow Shareholders: As 2023 progresses, ...",
5,6,CVS Health,Health Care: Pharmacy and Other Services,259500,322467,95422.0,4149,1.3,CVS,Dear Fellow Stockholders: 2022 marked a transf...,
6,7,Berkshire Hathaway,Insurance: Property and Casualty (Stock),383000,302089,675657.0,-22819,-7.6,BRKA,To the Shareholders of Berkshire Hathaway Inc....,
7,8,Alphabet,Internet Services and Retailing,190234,282836,1330201.0,59972,21.2,GOOGL,"To our investors,\r\n 2022 was a year full of ...",
8,9,McKesson,Wholesalers: Health Care,66500,263966,48757.0,1114,0.4,MCK,To our valued shareholders:for McKesson as we ...,
9,10,Chevron,Petroleum Refining,43846,246252,311093.0,35465,14.4,CVX,to our stockholders The events of 2022 demonst...,


In [78]:
letters_df['ShareholderLetter']=letters_df['Letter'].fillna('')+ letters_df['Overflow'].fillna('')
letters_df= letters_df.drop(columns=['Letter', 'Overflow'])

In [79]:
#function to remove punctuation
def remove_punctuation(text):
    translator=str.maketrans('','', string.punctuation)
    return text.translate(translator)

In [80]:
for index,row in letters_df.iterrows():
    letters_df.at[index,'ShareholderLetter']=remove_punctuation(row['ShareholderLetter'])

In [81]:
letters_df

Unnamed: 0,Rank,Company,Industry,Employees,"Revenue (in millions, USD)","Valuation (in millions, USD)","Profits (in millions, USD)",Profits (% of Sales),Ticker,ShareholderLetter
0,1,Walmart,General Merchandisers,2100000,611289,397475.0,11680,1.9,WMT,Dear Shareholders and Associates Thank you Gra...
1,2,Amazon.com,Internet Services and Retailing,1541000,513983,1058440.0,-2722,-0.5,AMZN,Dear shareholders As I sit down to write my se...
2,3,Exxon Mobil,Petroleum Refining,62000,413680,446424.0,55740,13.5,XOM,The world needs reliable and affordable energy...
3,4,Apple,"Computers, Office Equipment",164000,394328,2609039.0,99803,25.3,AAPL,To our shareholders Over the past year teams a...
4,5,UnitedHealth Group,Health Care: Insurance and Managed Care,400000,324162,440854.0,20120,6.2,UNH,Dear Fellow Shareholders As 2023 progresses Un...
...,...,...,...,...,...,...,...,...,...,...
95,96,United Airlines Holdings,Airlines,92795,44955,14474.0,737,1.6,UAL,Dear Fellow United Stockholders On behalf of t...
96,97,Thermo Fisher Scientific,"Scientific, Photographic and Control Equipment",130000,44915,222150.0,6950,15.5,TMO,To our\r\nshareholders\r\ncustomers and\r\ncol...
97,98,Qualcomm,Semiconductors and Other Electronic Components,51000,44200,142252.0,12936,29.3,QCOM,
98,99,Abbott Laboratories,Medical Products and Equipment,115000,43653,175984.0,6933,15.9,ABT,DEAR FELLOW SHAREHOLDER The three years of the...


In [82]:
#function to count words in a column
def count_words(cell):
    if pd.isnull(cell):
        return pd.Series()
    else:
        words = str(cell).split()
        return pd.Series(words).value_counts()

In [83]:
letters_df['ShareholderLetter']=letters_df['ShareholderLetter'].astype(str)

In [84]:
letters_df['Letter_word_count']=[count_words(row) for row in letters_df['ShareholderLetter']]

Getting the stock data

In [85]:
stock_data=Path('../Resourses/stockdata.csv')

stock_data_df=pd.read_csv(stock_data,delimiter=',',encoding='utf-8',errors='ignore')
stock_data_df.head()

Unnamed: 0,date,ticker,open,high,low,close,volume,afterHours,preMarket
0,2023-01-03,AA,45.77,46.67,44.44,44.58,2880182.0,44.58,45.87
1,2023-02-01,AA,52.1,54.73,51.62,54.58,5479053.0,54.9,52.36
2,2023-03-01,AA,51.29,52.96,50.53,51.65,7599559.0,51.78,50.3
3,2023-04-03,AA,42.56,42.95,41.13,41.77,3272145.0,41.65,42.56
4,2023-05-01,AA,37.66,37.6999,36.77,37.27,2730319.0,37.15,37.3


In [86]:
stock_data_df['ticker']=stock_data_df['ticker'].apply(str)
stock_data_df.dtypes

date           object
ticker         object
open          float64
high          float64
low           float64
close         float64
volume        float64
afterHours    float64
preMarket     float64
dtype: object

In [87]:
ticker_list=letters_df['Ticker'].dropna()


In [88]:
openprice=[]
closeprice=[]
open_tickers=[]
close_tickers=[]
for index,row in stock_data_df.iterrows():
    stock_ticker=row['ticker']
    if row['date']=='2023-01-03':
        openprice.append(row['open'])
        open_tickers.append(stock_ticker)
    elif row['date']=='2023-12-01':
        closeprice.append(row['close'])
        close_tickers.append(stock_ticker)

In [89]:
open_df=pd.DataFrame({'Ticker':open_tickers,'OpenPrice':openprice})
close_df=pd.DataFrame({'Ticker':close_tickers,'ClosePrice':closeprice})
openclose_df=pd.merge(open_df, close_df, on='Ticker', how='left')
openclose_df['PriceChange']=openclose_df['ClosePrice']-openclose_df['OpenPrice']
openclose_df


Unnamed: 0,Ticker,OpenPrice,ClosePrice,PriceChange
0,AA,45.77,27.74,-18.03
1,AAL,12.91,13.02,0.11
2,AAP,146.16,54.09,-92.07
3,AAPL,130.28,191.24,60.96
4,ABBV,162.04,143.41,-18.63
...,...,...,...,...
1337,XEL,69.81,61.43,-8.38
1338,XOM,109.78,102.99,-6.79
1339,XPO,33.75,90.31,56.56
1340,YUMC,55.59,43.73,-11.86


In [90]:
fortune_100_df=[]
for index,row in openclose_df.iterrows():
    stock_ticker=row['Ticker'] 
    if [ticker for ticker in ticker_list]:
        fortune_100_df.append(row)
fortune_100_df=pd.DataFrame(fortune_100_df)
fortune_100_df

Unnamed: 0,Ticker,OpenPrice,ClosePrice,PriceChange
0,AA,45.77,27.74,-18.03
1,AAL,12.91,13.02,0.11
2,AAP,146.16,54.09,-92.07
3,AAPL,130.28,191.24,60.96
4,ABBV,162.04,143.41,-18.63
...,...,...,...,...
1337,XEL,69.81,61.43,-8.38
1338,XOM,109.78,102.99,-6.79
1339,XPO,33.75,90.31,56.56
1340,YUMC,55.59,43.73,-11.86


In [91]:
wordcounts_df=pd.merge(letters_df, openclose_df, on='Ticker', how='left')
wordcounts_df

Unnamed: 0,Rank,Company,Industry,Employees,"Revenue (in millions, USD)","Valuation (in millions, USD)","Profits (in millions, USD)",Profits (% of Sales),Ticker,ShareholderLetter,Letter_word_count,OpenPrice,ClosePrice,PriceChange
0,1,Walmart,General Merchandisers,2100000,611289,397475.0,11680,1.9,WMT,Dear Shareholders and Associates Thank you Gra...,and 52 to 39 our 3...,142.55,154.34,11.79
1,2,Amazon.com,Internet Services and Retailing,1541000,513983,1058440.0,-2722,-0.5,AMZN,Dear shareholders As I sit down to write my se...,and 194 to 178 ...,85.46,147.03,61.57
2,3,Exxon Mobil,Petroleum Refining,62000,413680,446424.0,55740,13.5,XOM,The world needs reliable and affordable energy...,and 22 our 19 to ...,109.78,102.99,-6.79
3,4,Apple,"Computers, Office Equipment",164000,394328,2609039.0,99803,25.3,AAPL,To our shareholders Over the past year teams a...,and 21 our 15 to ...,130.28,191.24,60.96
4,5,UnitedHealth Group,Health Care: Insurance and Managed Care,400000,324162,440854.0,20120,6.2,UNH,Dear Fellow Shareholders As 2023 progresses Un...,and 37 to 13 the ...,525.13,547.16,22.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,99,Abbott Laboratories,Medical Products and Equipment,115000,43653,175984.0,6933,15.9,ABT,DEAR FELLOW SHAREHOLDER The three years of the...,the 80 of 66 and ...,110.51,104.88,-5.63
264,100,Coca-Cola,Beverages,82500,43004,268361.0,9542,22.2,KO,The CocaCola Company’s purpose is to refresh t...,to 41 and 34 our ...,63.56,58.64,-4.92
265,100,Coca-Cola,Beverages,82500,43004,268361.0,9542,22.2,KO,The CocaCola Company’s purpose is to refresh t...,to 41 and 34 our ...,63.56,58.64,-4.92
266,100,Coca-Cola,Beverages,82500,43004,268361.0,9542,22.2,KO,The CocaCola Company’s purpose is to refresh t...,to 41 and 34 our ...,63.56,58.64,-4.92


Getting the keywords from the news

In [92]:
newsKeyWords=Path('../Resourses/newsKeyWords.csv')

news_df=pd.read_csv(newsKeyWords,delimiter=',',encoding='utf-8',errors='ignore')
news_df

Unnamed: 0,Rank,Ticker,Company,homepages,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,1,WMT,Walmart,https://www.marketwatch.com/ https://www.zacks...,investing; Cryptocurrency; Top Stories; Market...,,News; Upgrades; Price Target; Reiteration; Ana...,News; Price Target; Reiteration; Analyst Ratin...,News; Analyst Color; Price Target; Reiteration...,,investing; investing,investing; News; Analyst Color; Tech; Long Ide...,investing,investing,News; Price Target; Reiteration; Top Stories; ...,News; Equities; Markets; Analyst Ratings; Trad...
1,2,AMZN,Amazon.com,https://www.marketwatch.com/ https://www.zacks...,investing,investing,investing; investing,News; Earnings; After-Hours Center; Movers; Tr...,investing; News; Earnings; Penny Stocks; Large...,investing; Long Ideas; News; Analyst Color; To...,investing; News; Top Stories; Tech,investing; Cryptocurrency; Eurozone; Economics...,Asia; Economics; Markets; Tech; investing,Cryptocurrency; Asia; Economics; Federal Reser...,investing; Equities; Macro Economic Events; Br...,investing; News; Equities; Cryptocurrency; Fin...
2,3,XOM,Exxon Mobil,https://www.marketwatch.com/ https://www.zacks...,,,investing,investing; Long Ideas; News; Top Stories; Mark...,investing,investing,investing; Sector ETFs; Macro Economic Events;...,Sector ETFs; Large Cap; Commodities; Econ #s; ...,News; Analyst Color; Penny Stocks; Equities; P...,News; Earnings; Price Target; Intraday Update;...,Long Ideas; Sector ETFs; Short Ideas; Specialt...,
3,4,AAPL,Apple,https://www.marketwatch.com/ https://www.zacks...,,News; Earnings; Penny Stocks; Large Cap; Divid...,Analyst Ratings; Long Ideas; Short Ideas; Tech...,,investing; News; Earnings; Penny Stocks; Large...,Long Ideas; News; Analyst Color; Top Stories; ...,investing; investing; investing,investing; Penny Stocks; Top Stories; Exclusiv...,investing; News; Earnings; Equities; Top Stori...,Cryptocurrency; Asia; Economics; Federal Reser...,Equities; Macro Economic Events; Broad U.S. Eq...,News; Analyst Color; Politics; Trading Ideas; ...
4,5,UNH,UnitedHealth Group,https://www.marketwatch.com/ https://www.zacks...,News; Health Care; After-Hours Center; Movers;...,investing,investing,investing,Analyst Ratings; investing; Upgrades; Downgrad...,,investing; investing,investing,,investing,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467,496,KKR,KKR,https://www.marketwatch.com/ https://www.zacks...,Analyst Ratings,investing; investing,Analyst Ratings; News; Entertainment; Gaming; ...,Analyst Ratings; News; M&A; Biotech; Large Cap...,Analyst Ratings; investing; Upgrades; Downgrad...,investing; investing; investing,Analyst Ratings; investing,investing; News; M&A; Analyst Color; Earnings;...,investing; Analyst Ratings,,,investing
468,497,EQIX,Equinix,https://www.marketwatch.com/ https://www.zacks...,investing; investing,investing; earningscall-transcripts,Upgrades; Downgrades; Initiation; Intraday Upd...,investing,Analyst Ratings; investing,News; Analyst Color; Equities; Downgrades; Pri...,investing; investing; Upgrades; Downgrades; In...,Analyst Ratings; investing,,earningscall-transcripts,,investing; Analyst Ratings
469,498,SON,Sonoco Products,https://www.globenewswire.com https://www.zack...,Calendar of Events,Upgrades; Downgrades; Initiation; Intraday Upd...,Business Contracts; Long Ideas; News; Dividend...,"Environmental, Social, and Governance Criteria",,,Dividend Reports and Estimates; Calendar of Ev...,Product / Services Announcement; Upgrades; Dow...,Mergers and Acquisitions,Calendar of Events; Conference Calls/ Webcasts,Dividends; Specialty ETFs; New ETFs; Exclusive...,Product / Services Announcement; Product / Ser...
470,499,NOW,ServiceNow,https://www.marketwatch.com/ https://www.globe...,investing; investing; News; Earnings; Price Ta...,Contests/Awards,Product / Services Announcement,investing,Analyst Ratings,Product / Services Announcement,Product / Services Announcement; News; Price T...,Partnerships; Product / Services Announcement;...,investing; investing,News; Analyst Color; Equities; Price Target; R...,,investing; investing


In [93]:
#function to count keywords in a column
def count_phrases(cell):
    if pd.isnull(cell):
        return pd.fillna('')
    else:
        phrases = str(cell).split(';')
        return pd.Series(phrases).value_counts()

In [94]:
news_df['YearWords']=news_df.loc[:,'Jan':'Dec'].apply(lambda row: ' '.join(str(cell) for cell in row if pd.notnull(cell)),axis=1)
news_df['YearWordCount']=[count_phrases(row) for row in news_df['YearWords']]
news_df

Unnamed: 0,Rank,Ticker,Company,homepages,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,YearWords,YearWordCount
0,1,WMT,Walmart,https://www.marketwatch.com/ https://www.zacks...,investing; Cryptocurrency; Top Stories; Market...,,News; Upgrades; Price Target; Reiteration; Ana...,News; Price Target; Reiteration; Analyst Ratin...,News; Analyst Color; Price Target; Reiteration...,,investing; investing,investing; News; Analyst Color; Tech; Long Ide...,investing,investing,News; Price Target; Reiteration; Top Stories; ...,News; Equities; Markets; Analyst Ratings; Trad...,investing; Cryptocurrency; Top Stories; Market...,Analyst Ratings 7  Tradin...
1,2,AMZN,Amazon.com,https://www.marketwatch.com/ https://www.zacks...,investing,investing,investing; investing,News; Earnings; After-Hours Center; Movers; Tr...,investing; News; Earnings; Penny Stocks; Large...,investing; Long Ideas; News; Analyst Color; To...,investing; News; Top Stories; Tech,investing; Cryptocurrency; Eurozone; Economics...,Asia; Economics; Markets; Tech; investing,Cryptocurrency; Asia; Economics; Federal Reser...,investing; Equities; Macro Economic Events; Br...,investing; News; Equities; Cryptocurrency; Fin...,investing investing investing; investing News;...,News 8  Top Storie...
2,3,XOM,Exxon Mobil,https://www.marketwatch.com/ https://www.zacks...,,,investing,investing; Long Ideas; News; Top Stories; Mark...,investing,investing,investing; Sector ETFs; Macro Economic Events;...,Sector ETFs; Large Cap; Commodities; Econ #s; ...,News; Analyst Color; Penny Stocks; Equities; P...,News; Earnings; Price Target; Intraday Update;...,Long Ideas; Sector ETFs; Short Ideas; Specialt...,,investing investing; Long Ideas; News; Top Sto...,Markets ...
3,4,AAPL,Apple,https://www.marketwatch.com/ https://www.zacks...,,News; Earnings; Penny Stocks; Large Cap; Divid...,Analyst Ratings; Long Ideas; Short Ideas; Tech...,,investing; News; Earnings; Penny Stocks; Large...,Long Ideas; News; Analyst Color; Top Stories; ...,investing; investing; investing,investing; Penny Stocks; Top Stories; Exclusiv...,investing; News; Earnings; Equities; Top Stori...,Cryptocurrency; Asia; Economics; Federal Reser...,Equities; Macro Economic Events; Broad U.S. Eq...,News; Analyst Color; Politics; Trading Ideas; ...,News; Earnings; Penny Stocks; Large Cap; Divid...,Top Stories 8  Economics ...
4,5,UNH,UnitedHealth Group,https://www.marketwatch.com/ https://www.zacks...,News; Health Care; After-Hours Center; Movers;...,investing,investing,investing,Analyst Ratings; investing; Upgrades; Downgrad...,,investing; investing,investing,,investing,,,News; Health Care; After-Hours Center; Movers;...,News ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467,496,KKR,KKR,https://www.marketwatch.com/ https://www.zacks...,Analyst Ratings,investing; investing,Analyst Ratings; News; Entertainment; Gaming; ...,Analyst Ratings; News; M&A; Biotech; Large Cap...,Analyst Ratings; investing; Upgrades; Downgrad...,investing; investing; investing,Analyst Ratings; investing,investing; News; M&A; Analyst Color; Earnings;...,investing; Analyst Ratings,,,investing,Analyst Ratings investing; investing Analyst R...,News 3  investi...
468,497,EQIX,Equinix,https://www.marketwatch.com/ https://www.zacks...,investing; investing,investing; earningscall-transcripts,Upgrades; Downgrades; Initiation; Intraday Upd...,investing,Analyst Ratings; investing,News; Analyst Color; Equities; Downgrades; Pri...,investing; investing; Upgrades; Downgrades; In...,Analyst Ratings; investing,,earningscall-transcripts,,investing; Analyst Ratings,investing; investing investing; earningscall-t...,Analyst Ratings ...
469,498,SON,Sonoco Products,https://www.globenewswire.com https://www.zack...,Calendar of Events,Upgrades; Downgrades; Initiation; Intraday Upd...,Business Contracts; Long Ideas; News; Dividend...,"Environmental, Social, and Governance Criteria",,,Dividend Reports and Estimates; Calendar of Ev...,Product / Services Announcement; Upgrades; Dow...,Mergers and Acquisitions,Calendar of Events; Conference Calls/ Webcasts,Dividends; Specialty ETFs; New ETFs; Exclusive...,Product / Services Announcement; Product / Ser...,Calendar of Events Upgrades; Downgrades; Initi...,Trading Ideas ...
470,499,NOW,ServiceNow,https://www.marketwatch.com/ https://www.globe...,investing; investing; News; Earnings; Price Ta...,Contests/Awards,Product / Services Announcement,investing,Analyst Ratings,Product / Services Announcement,Product / Services Announcement; News; Price T...,Partnerships; Product / Services Announcement;...,investing; investing,News; Analyst Color; Equities; Price Target; R...,,investing; investing,investing; investing; News; Earnings; Price Ta...,News ...


In [95]:
news_count=[]
news_ticker=[]
for index,row in news_df.iterrows():
    stock_ticker=row['Ticker']
    if stock_ticker in [ticker for ticker in ticker_list]:
        news_count.append(row['YearWordCount'])
        news_ticker.append(row['Ticker']) 



In [96]:
fortune_500_df=pd.merge(news_df,openclose_df, on='Ticker', how='left')
fortune_500_df

Unnamed: 0,Rank,Ticker,Company,homepages,Jan,Feb,Mar,Apr,May,Jun,...,Aug,Sep,Oct,Nov,Dec,YearWords,YearWordCount,OpenPrice,ClosePrice,PriceChange
0,1,WMT,Walmart,https://www.marketwatch.com/ https://www.zacks...,investing; Cryptocurrency; Top Stories; Market...,,News; Upgrades; Price Target; Reiteration; Ana...,News; Price Target; Reiteration; Analyst Ratin...,News; Analyst Color; Price Target; Reiteration...,,...,investing; News; Analyst Color; Tech; Long Ide...,investing,investing,News; Price Target; Reiteration; Top Stories; ...,News; Equities; Markets; Analyst Ratings; Trad...,investing; Cryptocurrency; Top Stories; Market...,Analyst Ratings 7  Tradin...,142.55,154.34,11.79
1,2,AMZN,Amazon.com,https://www.marketwatch.com/ https://www.zacks...,investing,investing,investing; investing,News; Earnings; After-Hours Center; Movers; Tr...,investing; News; Earnings; Penny Stocks; Large...,investing; Long Ideas; News; Analyst Color; To...,...,investing; Cryptocurrency; Eurozone; Economics...,Asia; Economics; Markets; Tech; investing,Cryptocurrency; Asia; Economics; Federal Reser...,investing; Equities; Macro Economic Events; Br...,investing; News; Equities; Cryptocurrency; Fin...,investing investing investing; investing News;...,News 8  Top Storie...,85.46,147.03,61.57
2,3,XOM,Exxon Mobil,https://www.marketwatch.com/ https://www.zacks...,,,investing,investing; Long Ideas; News; Top Stories; Mark...,investing,investing,...,Sector ETFs; Large Cap; Commodities; Econ #s; ...,News; Analyst Color; Penny Stocks; Equities; P...,News; Earnings; Price Target; Intraday Update;...,Long Ideas; Sector ETFs; Short Ideas; Specialt...,,investing investing; Long Ideas; News; Top Sto...,Markets ...,109.78,102.99,-6.79
3,4,AAPL,Apple,https://www.marketwatch.com/ https://www.zacks...,,News; Earnings; Penny Stocks; Large Cap; Divid...,Analyst Ratings; Long Ideas; Short Ideas; Tech...,,investing; News; Earnings; Penny Stocks; Large...,Long Ideas; News; Analyst Color; Top Stories; ...,...,investing; Penny Stocks; Top Stories; Exclusiv...,investing; News; Earnings; Equities; Top Stori...,Cryptocurrency; Asia; Economics; Federal Reser...,Equities; Macro Economic Events; Broad U.S. Eq...,News; Analyst Color; Politics; Trading Ideas; ...,News; Earnings; Penny Stocks; Large Cap; Divid...,Top Stories 8  Economics ...,130.28,191.24,60.96
4,5,UNH,UnitedHealth Group,https://www.marketwatch.com/ https://www.zacks...,News; Health Care; After-Hours Center; Movers;...,investing,investing,investing,Analyst Ratings; investing; Upgrades; Downgrad...,,...,investing,,investing,,,News; Health Care; After-Hours Center; Movers;...,News ...,525.13,547.16,22.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328,499,NOW,ServiceNow,https://www.marketwatch.com/ https://www.globe...,investing; investing; News; Earnings; Price Ta...,Contests/Awards,Product / Services Announcement,investing,Analyst Ratings,Product / Services Announcement,...,Partnerships; Product / Services Announcement;...,investing; investing,News; Analyst Color; Equities; Price Target; R...,,investing; investing,investing; investing; News; Earnings; Price Ta...,News ...,395.08,690.79,295.71
1329,500,RHI,Robert Half International,https://www.marketwatch.com/ https://www.zacks...,Analyst Color; Economics; Analyst Ratings,,,,,Analyst Ratings; Upgrades; Downgrades; Initiat...,...,Analyst Color; Equities; Large Cap; Mid Cap; H...,,,,,Analyst Color; Economics; Analyst Ratings Anal...,Economics 2 Analyst ...,74.47,83.37,8.90
1330,500,RHI,Robert Half International,https://www.marketwatch.com/ https://www.zacks...,Analyst Color; Economics; Analyst Ratings,,,,,Analyst Ratings; Upgrades; Downgrades; Initiat...,...,Analyst Color; Equities; Large Cap; Mid Cap; H...,,,,,Analyst Color; Economics; Analyst Ratings Anal...,Economics 2 Analyst ...,74.47,83.37,8.90
1331,500,RHI,Robert Half International,https://www.marketwatch.com/ https://www.zacks...,Analyst Color; Economics; Analyst Ratings,,,,,Analyst Ratings; Upgrades; Downgrades; Initiat...,...,Analyst Color; Equities; Large Cap; Mid Cap; H...,,,,,Analyst Color; Economics; Analyst Ratings Anal...,Economics 2 Analyst ...,74.47,83.37,8.90


In [97]:
fortune_news_df=pd.DataFrame({'Ticker':news_ticker, 'YearWordCount':news_count})
fortune_news_df

Unnamed: 0,Ticker,YearWordCount
0,WMT,Analyst Ratings 7  Tradin...
1,AMZN,News 8  Top Storie...
2,XOM,Markets ...
3,AAPL,Top Stories 8  Economics ...
4,UNH,News ...
...,...,...
86,UAL,Top Stories 5 ...
87,TMO,Analyst Ratings 9 ...
88,QCOM,Top Stories 7  Markets ...
89,ABT,Markets 5  Analyst Ratings ...


In [98]:
fortune_100_df=pd.merge(wordcounts_df, fortune_news_df, on='Ticker', how='left')
fortune_100_df

Unnamed: 0,Rank,Company,Industry,Employees,"Revenue (in millions, USD)","Valuation (in millions, USD)","Profits (in millions, USD)",Profits (% of Sales),Ticker,ShareholderLetter,Letter_word_count,OpenPrice,ClosePrice,PriceChange,YearWordCount
0,1,Walmart,General Merchandisers,2100000,611289,397475.0,11680,1.9,WMT,Dear Shareholders and Associates Thank you Gra...,and 52 to 39 our 3...,142.55,154.34,11.79,Analyst Ratings 7  Tradin...
1,2,Amazon.com,Internet Services and Retailing,1541000,513983,1058440.0,-2722,-0.5,AMZN,Dear shareholders As I sit down to write my se...,and 194 to 178 ...,85.46,147.03,61.57,News 8  Top Storie...
2,3,Exxon Mobil,Petroleum Refining,62000,413680,446424.0,55740,13.5,XOM,The world needs reliable and affordable energy...,and 22 our 19 to ...,109.78,102.99,-6.79,Markets ...
3,4,Apple,"Computers, Office Equipment",164000,394328,2609039.0,99803,25.3,AAPL,To our shareholders Over the past year teams a...,and 21 our 15 to ...,130.28,191.24,60.96,Top Stories 8  Economics ...
4,5,UnitedHealth Group,Health Care: Insurance and Managed Care,400000,324162,440854.0,20120,6.2,UNH,Dear Fellow Shareholders As 2023 progresses Un...,and 37 to 13 the ...,525.13,547.16,22.03,News ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
263,99,Abbott Laboratories,Medical Products and Equipment,115000,43653,175984.0,6933,15.9,ABT,DEAR FELLOW SHAREHOLDER The three years of the...,the 80 of 66 and ...,110.51,104.88,-5.63,Markets 5  Analyst Ratings ...
264,100,Coca-Cola,Beverages,82500,43004,268361.0,9542,22.2,KO,The CocaCola Company’s purpose is to refresh t...,to 41 and 34 our ...,63.56,58.64,-4.92,investing 10  investin...
265,100,Coca-Cola,Beverages,82500,43004,268361.0,9542,22.2,KO,The CocaCola Company’s purpose is to refresh t...,to 41 and 34 our ...,63.56,58.64,-4.92,investing 10  investin...
266,100,Coca-Cola,Beverages,82500,43004,268361.0,9542,22.2,KO,The CocaCola Company’s purpose is to refresh t...,to 41 and 34 our ...,63.56,58.64,-4.92,investing 10  investin...


Calculating the wordvalues

In [99]:
#function to count words and return the words and counts as a new dataframe
def total_word_count(df, column_name,indexcolumn,pricechangecolumn):
    indexvalues=[]
    words=[]
    counts=[]
    valuechange=[]
    total_words=[]
    
    for index, row in df.iterrows():
        word_counts = Counter(row[column_name].split())
        total_count= len(word_counts)
        for word, count in word_counts.items():
            indexvalues.append(row[indexcolumn])
            valuechange.append(row[pricechangecolumn])
            words.append(word)
            counts.append(count)
            total_words.append(total_count)
            
    word_count_df = pd.DataFrame({'Ticker':indexvalues,'Word':words, 'Count':counts,'EachWordCount':total_words,'StockPriceChange':valuechange})

    return word_count_df

In [100]:
#function to count keywords in the news and return the keywords and counts as a new dataframe
def total_news_word_count(df, column_name,indexcolumn,pricechangecolumn):
    indexvalues=[]
    words=[]
    counts=[]
    valuechange=[]
    total_words=[]
    
    
    for index, row in df.iterrows():
        word_counts = Counter(re.split(r';\s*', row[column_name]))
        total_count= len(word_counts)
        for word, count in word_counts.items():
            indexvalues.append(row[indexcolumn])
            valuechange.append(row[pricechangecolumn])
            words.append(word)
            counts.append(count)
            total_words.append(total_count)
    word_count_df = pd.DataFrame({'Ticker':indexvalues,'Word':words, 'Count':counts,'EachWordCount':total_words,'StockPriceChange':valuechange})

    return word_count_df

In [101]:
lettertotalwordcounts_df=total_word_count(fortune_100_df,'ShareholderLetter','Ticker','PriceChange')
lettertotalwordcounts_df

Unnamed: 0,Ticker,Word,Count,EachWordCount,StockPriceChange
0,WMT,Dear,1,513,11.79
1,WMT,Shareholders,1,513,11.79
2,WMT,and,52,513,11.79
3,WMT,Associates,1,513,11.79
4,WMT,Thank,2,513,11.79
...,...,...,...,...,...
140432,KO,loved,1,435,-4.92
140433,KO,how,1,435,-4.92
140434,KO,will,1,435,-4.92
140435,KO,thrive,1,435,-4.92


In [102]:
#function to find the value for each word
def find_word_value(df,word_count,total_word_count,stock_price_change):
    wordvalue=[]
    for index,row in df.iterrows():
        value=(row[stock_price_change]/row[total_word_count])/row[word_count]
        wordvalue.append(value)
    return wordvalue

In [103]:
lettertotalwordcounts_df['WordValue']=find_word_value(lettertotalwordcounts_df,'Count','EachWordCount','StockPriceChange')

In [104]:
lettertotalwordcounts_df

Unnamed: 0,Ticker,Word,Count,EachWordCount,StockPriceChange,WordValue
0,WMT,Dear,1,513,11.79,0.022982
1,WMT,Shareholders,1,513,11.79,0.022982
2,WMT,and,52,513,11.79,0.000442
3,WMT,Associates,1,513,11.79,0.022982
4,WMT,Thank,2,513,11.79,0.011491
...,...,...,...,...,...,...
140432,KO,loved,1,435,-4.92,-0.011310
140433,KO,how,1,435,-4.92,-0.011310
140434,KO,will,1,435,-4.92,-0.011310
140435,KO,thrive,1,435,-4.92,-0.011310


In [105]:
newstotalwordcounts_df=total_news_word_count(fortune_500_df,'YearWords','Ticker','PriceChange')
newstotalwordcounts_df

Unnamed: 0,Ticker,Word,Count,EachWordCount,StockPriceChange
0,WMT,investing,1,28,11.79
1,WMT,Cryptocurrency,1,28,11.79
2,WMT,Top Stories,2,28,11.79
3,WMT,Markets,4,28,11.79
4,WMT,investing News,3,28,11.79
...,...,...,...,...,...
29645,RHI,Mid Cap,1,16,8.90
29646,RHI,Health Care,1,16,8.90
29647,RHI,Top Stories,1,16,8.90
29648,RHI,Analyst Ratings,1,16,8.90


In [106]:
newstotalwordcounts_df['WordValue']=find_word_value(newstotalwordcounts_df,'Count','EachWordCount','StockPriceChange')
newstotalwordcounts_df

Unnamed: 0,Ticker,Word,Count,EachWordCount,StockPriceChange,WordValue
0,WMT,investing,1,28,11.79,0.421071
1,WMT,Cryptocurrency,1,28,11.79,0.421071
2,WMT,Top Stories,2,28,11.79,0.210536
3,WMT,Markets,4,28,11.79,0.105268
4,WMT,investing News,3,28,11.79,0.140357
...,...,...,...,...,...,...
29645,RHI,Mid Cap,1,16,8.90,0.556250
29646,RHI,Health Care,1,16,8.90,0.556250
29647,RHI,Top Stories,1,16,8.90,0.556250
29648,RHI,Analyst Ratings,1,16,8.90,0.556250


Exporting DataFrames as JSON

In [107]:
#dataframe with information with all fortune 100 companies
fortune_100_df.to_json('../resourses/fortune_100_final_df.json',orient='records')


In [108]:
#dataframe with keywords and annual ticker information for the fortune 500 companies
fortune_500_df.to_json('../resourses/fortune_500_final_df.json',orient='records')

In [109]:
#dataframe with the wordcounts and wordvalues for the available shareholder letters and the news keywords for the fortune 100 companies
lettertotalwordcounts_df.to_csv('../resourses/shareholderletter_total_word_counts.csv',index=False)

In [110]:
#dataframe with the wordcounts and wordvalues for the available news keywords for the fortune 500 companies
newstotalwordcounts_df.to_csv('../resourses/news_total_word_counts.csv',index=False)