In [2]:
import pandas as pd
import cpi as cpi
from iexfinance.stocks import Stock
import difflib
import requests
import bs4
from bs4 import BeautifulSoup

In [2]:
cpi.update()

In [137]:
#Creates a new Dataframe to hold revenues for the top X amount of companies each year
def createNewDF(rangeTop):
    column_values = ['year']

    for i in range(1, rangeTop + 1):
        column_values.append('#' + str(i) + ' Revenue')
        column_values.append('#' + str(i) + ' Company')
        column_values.append('#' + str(i) + ' isTech')
        column_values.append('#' + str(i) + ' State')
    
    df_raw_revenue = pd.DataFrame(columns=column_values)
    df_raw_revenue.set_index('year', inplace=True)
    return df_raw_revenue
    

In [138]:
#Given name of the company, extracts its corresponding sector using a CSV file
def getCompanyInfoUsingCSV(company, df_company_info, yearStr, rank):
    dict_states = getDictStates()
    try:
        best_match = difflib.get_close_matches(company, df_company_info['Security'], cutoff=0.7)[0]
        print("Matched: " + company + " to " + best_match + "(" + yearStr + " #" + rank +  ")" )
    except:
        print("Could not be matched: " + company + "(" + yearStr + " #" + rank +  ")" )
        return None, None
    index = df_company_info[df_company_info['Security'] == best_match].index.tolist()[0]
    state_abr = dict_states[df_company_info.loc[index]['Headquarters Location'].split(',')[-1].strip()]
    return df_company_info.loc[index]['GICS Sector'], state_abr

#Given name of company, extracts its corresponding sector by webscraping its Fortune 500 page
def getCompanySectorUsingWebscraping(company, year, rank):
    if year < 2015:
        return
    url = "https://fortune.com/fortune500/" + str(year) + "/" + company + "/"
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    try:
        container = soup.body.table.findAll("td", {"class": "dataTable__value--3n5tL dataTable__valueAlignLeft--3uvNx"})
        sector = container[2].div.string
        print("Matched: " + company + " to " + sector)
    except:
        print("Could not be matched: " + company + "(" + str(year) + " #" + rank +  ")" )
        return
    return sector

def checkForTechnologySector(sector):
    #Check sector Technology
    return sector == "Technology"

def checkForInformationTechnologySector(sector):
    if sector == None:
        return None
    #Check sector Technology
    return sector == "Information Technology"

In [139]:
def getDictStates(): 
    dict_states = {
            'Alaska': 'AK',
            'Alabama':'AL',
            'Arkansas': 'AR',
            'American Samoa': 'AS',
            'Arizona': 'AZ',
            'California': 'CA',
            'Colorado' : 'CO',
            'Connecticut': 'CT',
            'District of Columbia': 'DC',
            'Delaware': 'DE',
            'Florida': 'FL',
            'Georgia': 'GA',
            'Guam': 'GU',
            'Hawaii': 'HI',
            'Iowa': 'IA',
            'Idaho' : 'ID',
            'Illinois': 'IL',
            'Indiana' : 'IN',
            'Kansas': 'KS',
            'Kentucky' : 'KY',
            'Louisiana': 'LA',
            'Massachusetts': 'MA',
            'Maryland': 'MD',
            'Maine': 'ME',
            'Michigan': 'MI',
            'Minnesota': 'MN',
            'Missouri': 'MO',
            'Northern Mariana Islands': 'MP',
            'Mississippi': 'MS',
            'Montana': 'MT',
            'National': 'NA',
            'North Carolina': 'NC',
            'North Dakota': 'ND',
            'Nebraska':'NE',
            'New Hampshire': 'NH',
            'New Jersey': 'NJ',
             'New Mexico': 'NM',
             'Nevada': 'NV',
             'New York': 'NY',
             'Ohio':'OH',
             'Oklahoma':'OK',
             'Oregon':'OR',
             'Pennsylvania':'PA',
             'Puerto Rico': 'PR',
             'Rhode Island': 'RI',
             'South Carolina': 'SC',
             'South Dakota': 'SD',
             'Tennessee': 'TN',
             'Texas': 'TX',
             'Utah': 'UT',
             'Virginia': 'VA',
             'Virgin Islands': 'VI',
             'Vermont':'VT',
             'Washington': 'WA',
             'Wisconsin' : 'WI',
             'West Virginia': 'WV',
             'Wyoming': 'WY'
    }
    return dict_states

def getStates():
    states_abr = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
              "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
              "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
              "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
              "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
    return states_abr

In [140]:
#All revenues's values account whether they are in Information Technology sector
#Negative values mean they are in Technology Sector
#Positive values mean they are not in Technology Sector
def populateRawDF(df, startYear, endYear, rangeTop):
    company_info = pd.read_csv('S&P500-Condensed.csv')
    for year in range(startYear,endYear + 1):
        print("-----------------------" + str(year) + "-----------------------")
        temp = pd.read_csv('fortune500-'+ str(year) + '.csv')
        temp = temp.head(rangeTop)
        
        revenues_list = list(temp['revenue ($ millions)'])
        
        for index in range(rangeTop):
            
            company = temp.loc[index]['company']
            sector, state_abr = getCompanyInfoUsingCSV(company, company_info, str(year), str(index + 1))
            is_information_sector = checkForInformationTechnologySector(sector)
                  
            #company = temp.loc[index]['company']
            #sector = getCompanySector(company, year, str(index))
            #is_information_sector = checkForTechnologySector(sector)
            
            inflated_revenue = inflateRevenue(revenues_list[index], year)
            df.set_value(year, '#' + str(index + 1) + ' Revenue', inflated_revenue)
            
            df.set_value(year, '#' + str(index + 1) + ' Company', company)
            
            #All companies that are Information Technology are marked as negative 
            df.set_value(year, '#' + str(index + 1) + ' isTech', is_information_sector)
            
            df.set_value(year, '#' + str(index + 1) + ' State', state_abr)
            
            

In [141]:
#Accounts for inflation of all revenues (standarizes to 2019)
def inflateDF(df, startYear, endYear, rangeTop):
    for year in range(startYear, endYear + 1):
        for col in range(rangeTop):
            #inflated to November 2019 US dollar equivalent 
            df_raw_revenue.loc[year][col] = cpi.inflate(df_raw_revenue.loc[year][col], year) 

#Accounts for inflation of all revenues (standarizes to 2019)
def inflateRevenue(revenue, year):
        return cpi.inflate(revenue, year)

In [142]:
#sorts through given list, returning mean of all negative & positive values
def sortThroughRow(rowList):
    negSum = 0
    posSum = 0
    for value in rowList:
        if value < 0:
            negSum += value
        else:
            posSum += value
    negSum /= len(rowList)
    posSum /= len(rowList)
    return (negSum * -1, posSum)

#calculates mean revenues for df in terms of Information Tech vs non-Information Tech
def calculateMeansForDF(df, startYear, endYear):
    negativeList = []
    positiveList = []
    for year in range(startYear, endYear + 1):
        
        result = sortThroughRow(list(df.loc[year]))
        
        negativeList.append(result[0])
        positiveList.append(result[1])
        
    df['Mean Revenue Information Technology ($ millions)'] = negativeList;
    df['Mean Revenue Not Information Technology ($ millions)'] = positiveList;
        

In [143]:
top_x_companies = 100
start_year = 2011
#inclusive
end_year = 2018

csv_suffix = str(top_x_companies) + '_' + str(start_year) + '-' + str(end_year) + '.csv'

In [144]:
df_raw_revenue = createNewDF(top_x_companies)
populateRawDF(df_raw_revenue, start_year, end_year, top_x_companies)
#df_raw_revenue.to_csv('raw_revenue_2000-2018.csv')
df_raw_revenue.head(20)

-----------------------2011-----------------------
Could not be matched: Wal-Mart Stores(2011 #1)
Matched: Exxon Mobil to Exxon Mobil Corp.(2011 #2)
Matched: Chevron to Chevron Corp.(2011 #3)
Matched: ConocoPhillips to ConocoPhillips(2011 #4)
Could not be matched: Fannie Mae(2011 #5)
Matched: General Electric to General Electric(2011 #6)
Matched: Berkshire Hathaway to Berkshire Hathaway(2011 #7)
Matched: General Motors to General Motors(2011 #8)
Matched: Bank of America Corp. to Bank of America Corp(2011 #9)
Matched: Ford Motor to Ford Motor(2011 #10)
Could not be matched: Hewlett-Packard(2011 #11)
Could not be matched: AT&T(2011 #12)
Matched: J.P. Morgan Chase & Co. to JPMorgan Chase & Co.(2011 #13)
Matched: Citigroup to Citigroup Inc.(2011 #14)
Matched: McKesson to McKesson Corp.(2011 #15)
Matched: Verizon Communications to Verizon Communications(2011 #16)
Matched: American International Group to American International Group(2011 #17)
Matched: International Business Machines to Inter



Could not be matched: Express Scripts(2011 #55)
Matched: Intel to Incyte(2011 #56)
Matched: Sears Holdings to Capri Holdings(2011 #57)
Matched: Caterpillar to Caterpillar Inc.(2011 #58)
Could not be matched: Chrysler Group(2011 #59)
Could not be matched: Safeway(2011 #60)
Could not be matched: Supervalu(2011 #61)
Matched: Cisco Systems to Cisco Systems(2011 #62)
Matched: Morgan Stanley to Morgan Stanley(2011 #63)
Matched: Prudential Financial to Prudential Financial(2011 #64)
Could not be matched: Walt Disney(2011 #65)
Matched: Comcast to Comcast Corp.(2011 #66)
Could not be matched: Sysco(2011 #67)
Could not be matched: Sunoco(2011 #68)
Matched: Abbott Laboratories to Abbott Laboratories(2011 #69)
Could not be matched: Coca-Cola(2011 #70)
Could not be matched: New York Life Insurance(2011 #71)
Matched: Northrop Grumman to Northrop Grumman(2011 #72)
Could not be matched: FedEx(2011 #73)
Could not be matched: Hess(2011 #74)
Could not be matched: Ingram Micro(2011 #75)
Could not be match

Could not be matched: FedEx(2013 #63)
Could not be matched: Enterprise Products Partners(2013 #64)
Could not be matched: Sysco(2013 #65)
Could not be matched: Walt Disney(2013 #66)
Could not be matched: Johnson Controls(2013 #67)
Matched: Goldman Sachs Group to Goldman Sachs Group(2013 #68)
Could not be matched: CHS(2013 #69)
Matched: Abbott Laboratories to Abbott Laboratories(2013 #70)
Matched: Sears Holdings to Capri Holdings(2013 #71)
Could not be matched: DuPont(2013 #72)
Matched: Humana to Humana Inc.(2013 #73)
Could not be matched: World Fuel Services(2013 #74)
Could not be matched: Hess(2013 #75)
Could not be matched: Ingram Micro(2013 #76)
Could not be matched: Plains All American Pipeline(2013 #77)
Matched: Honeywell International to Mondelez International(2013 #78)
Matched: United Continental Holdings to United Airlines Holdings(2013 #79)
Could not be matched: Oracle(2013 #80)
Could not be matched: Liberty Mutual Insurance Group(2013 #81)
Matched: HCA Holdings to Capri Holdin

Could not be matched: Cigna(2015 #90)
Matched: Mondelez International to Mondelez International(2015 #91)
Could not be matched: TIAA-CREF(2015 #92)
Could not be matched: INTL FCStone(2015 #93)
Could not be matched: Massachusetts Mutual Life Insurance(2015 #94)
Could not be matched: DirecTV(2015 #95)
Matched: Halliburton to Halliburton Co.(2015 #96)
Could not be matched: Twenty-First Century Fox(2015 #97)
Could not be matched: 3M(2015 #98)
Matched: Sears Holdings to Capri Holdings(2015 #99)
Matched: General Dynamics to General Dynamics(2015 #100)
-----------------------2016-----------------------
Matched: Walmart to Walmart(2016 #1)
Matched: Exxon Mobil to Exxon Mobil Corp.(2016 #2)
Could not be matched: Apple(2016 #3)
Matched: Berkshire Hathaway to Berkshire Hathaway(2016 #4)
Matched: McKesson to McKesson Corp.(2016 #5)
Matched: UnitedHealth Group to United Health Group Inc.(2016 #6)
Matched: CVS Health to CVS Health(2016 #7)
Matched: General Motors to General Motors(2016 #8)
Matched: 

Matched: Delta Air Lines to Delta Air Lines Inc.(2017 #71)
Could not be matched: Best Buy(2017 #72)
Matched: Honeywell International to Mondelez International(2017 #73)
Matched: Caterpillar to Caterpillar Inc.(2017 #74)
Could not be matched: Liberty Mutual Insurance Group(2017 #75)
Matched: Morgan Stanley to Morgan Stanley(2017 #76)
Could not be matched: Massachusetts Mutual Life Insurance(2017 #77)
Matched: Goldman Sachs Group to Goldman Sachs Group(2017 #78)
Could not be matched: Energy Transfer Equity(2017 #79)
Could not be matched: TIAA(2017 #80)
Could not be matched: Oracle(2017 #81)
Matched: Tyson Foods to Tyson Foods(2017 #82)
Matched: United Continental Holdings to United Airlines Holdings(2017 #83)
Matched: Allstate to Allstate Corp(2017 #84)
Could not be matched: Publix Super Markets(2017 #85)
Matched: American Express to American Express Co(2017 #86)
Could not be matched: TJX(2017 #87)
Matched: Nike to Nike(2017 #88)
Could not be matched: Exelon(2017 #89)
Matched: General Dy

Unnamed: 0_level_0,#1 Revenue,#1 Company,#1 isTech,#1 State,#2 Revenue,#2 Company,#2 isTech,#2 State,#3 Revenue,#3 Company,...,#98 isTech,#98 State,#99 Revenue,#99 Company,#99 isTech,#99 State,#100 Revenue,#100 Company,#100 isTech,#100 State
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011,479457,Wal-Mart Stores,,,403109,Exxon Mobil,False,TX,223149,Chevron,...,,,29429.0,Plains All American Pipeline,,,29174.5,Rite Aid,,
2012,504341,Exxon Mobil,False,TX,497687,Wal-Mart Stores,,,273503,Chevron,...,,,34627.1,Philip Morris International,False,NY,34182.5,Nationwide,,
2013,514878,Wal-Mart Stores,,,493724,Exxon Mobil,False,TX,256691,Chevron,...,False,VA,34434.5,Philip Morris International,False,NY,33314.3,Nationwide,,
2014,514362,Wal-Mart Stores,,,440248,Exxon Mobil,False,TX,237968,Chevron,...,,,33713.1,General Dynamics,False,VA,33712.0,Philip Morris International,False,NY
2015,523845,Walmart,False,AR,412686,Exxon Mobil,False,TX,219810,Chevron,...,,,33651.5,Sears Holdings,False,NY,33278.3,General Dynamics,False,VA
2016,513568,Walmart,False,AR,262258,Exxon Mobil,False,TX,248955,Apple,...,,,29951.5,Time Warner,,,29944.0,Northwestern Mutual,,
2017,506759,Walmart,False,AR,233216,Berkshire Hathaway,False,NE,224909,Apple,...,False,CA,28812.5,Travelers Cos.,,,28702.0,Capital One Financial,False,VA
2018,509409,Walmart,False,AR,248791,Exxon Mobil,False,TX,246524,Berkshire Hathaway,...,,,31534.2,General Dynamics,False,VA,30559.7,USAA,,


In [145]:
#inflateDF(df_raw_revenue, start_year, end_year, top_x_companies)
#df_raw_revenue.to_csv('inflated_revenue_1980-2018.csv')
#df_raw_revenue.to_csv('cleaned_revenue_1980-2018.csv')
df_raw_revenue.to_csv('cleaned_revenue_top_ ' + csv_suffix)
df_raw_revenue

Unnamed: 0_level_0,#1 Revenue,#1 Company,#1 isTech,#1 State,#2 Revenue,#2 Company,#2 isTech,#2 State,#3 Revenue,#3 Company,...,#98 isTech,#98 State,#99 Revenue,#99 Company,#99 isTech,#99 State,#100 Revenue,#100 Company,#100 isTech,#100 State
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011,479457,Wal-Mart Stores,,,403109,Exxon Mobil,False,TX,223149,Chevron,...,,,29429.0,Plains All American Pipeline,,,29174.5,Rite Aid,,
2012,504341,Exxon Mobil,False,TX,497687,Wal-Mart Stores,,,273503,Chevron,...,,,34627.1,Philip Morris International,False,NY,34182.5,Nationwide,,
2013,514878,Wal-Mart Stores,,,493724,Exxon Mobil,False,TX,256691,Chevron,...,False,VA,34434.5,Philip Morris International,False,NY,33314.3,Nationwide,,
2014,514362,Wal-Mart Stores,,,440248,Exxon Mobil,False,TX,237968,Chevron,...,,,33713.1,General Dynamics,False,VA,33712.0,Philip Morris International,False,NY
2015,523845,Walmart,False,AR,412686,Exxon Mobil,False,TX,219810,Chevron,...,,,33651.5,Sears Holdings,False,NY,33278.3,General Dynamics,False,VA
2016,513568,Walmart,False,AR,262258,Exxon Mobil,False,TX,248955,Apple,...,,,29951.5,Time Warner,,,29944.0,Northwestern Mutual,,
2017,506759,Walmart,False,AR,233216,Berkshire Hathaway,False,NE,224909,Apple,...,False,CA,28812.5,Travelers Cos.,,,28702.0,Capital One Financial,False,VA
2018,509409,Walmart,False,AR,248791,Exxon Mobil,False,TX,246524,Berkshire Hathaway,...,,,31534.2,General Dynamics,False,VA,30559.7,USAA,,


In [31]:
calculateMeansForDF(df_raw_revenue, start_year, end_year)
df_raw_revenue.to_csv('full_mean_revenue_top_ ' + csv_suffix)
df_raw_revenue

Unnamed: 0_level_0,#1,#2,#3,#4,#5,#6,#7,#8,#9,#10,...,#493,#494,#495,#496,#497,#498,#499,#500,Mean Revenue Information Technology ($ millions),Mean Revenue Not Information Technology ($ millions)
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010,478605.342655,333734.293255,191725.16344,183813.556164,176393.200141,163572.597658,144230.898604,138708.718659,135571.276296,-134305.043952,...,4939.947554,4938.306142,4917.319513,4916.264319,4916.147076,4896.684615,4884.139536,4879.449786,1233.635772,21660.498993
2011,479457.31862,403108.802022,223149.068899,210225.228449,174831.567781,172334.542236,154782.623489,154108.642539,152519.729607,146564.147516,...,5080.543417,5048.60604,5029.284495,5012.91801,5004.393799,5001.438739,5001.097771,4984.617629,1428.535698,23085.704465
2012,504341.151694,497686.769471,273503.349378,264206.589475,167334.997134,164373.039853,159999.142033,153054.131672,151732.386073,-141689.569261,...,5380.963295,5374.282189,5373.502726,5372.723264,5371.609746,5352.902647,5322.614964,5311.368434,1611.094751,24557.143079
2013,514878.49446,493724.185588,256690.791189,186072.537022,178293.861919,-171758.589594,167092.262486,161185.824929,151760.985512,147333.900952,...,5472.947621,5472.508644,5467.460407,5454.400837,5355.630978,5323.146669,5299.551647,5290.552615,1681.570455,24794.080012
2014,514361.546862,440248.490141,237967.837135,196708.242726,-184569.891651,174056.826908,167849.421039,158659.263775,157918.435586,148768.235528,...,5413.661382,5409.341684,5375.864026,5369.384479,5369.384479,5354.265536,5352.105687,5351.025763,1684.600103,24688.241927
2015,523844.609066,412686.01505,219810.419033,209982.892202,-197170.757013,168191.903336,161186.109596,159985.578659,155407.812895,150327.398959,...,5734.07229,-5720.049916,5717.892628,5709.263475,5696.319745,5690.926524,5643.466182,5598.163128,1298.989574,25717.156739
2016,513567.976809,262258.084256,-248954.721133,224567.885091,193059.078848,167351.38683,163285.493881,162290.591074,159310.143479,156373.369348,...,5575.290462,5535.877824,5529.486586,-5513.508489,5513.508489,5507.117251,5477.291471,5464.508993,1467.594552,24086.771651
2017,506759.275298,233216.089377,-224908.697059,213816.529161,200761.459526,192785.737108,185157.329398,173532.195088,170826.686529,158325.443048,...,5475.682319,5461.080499,5423.532963,5420.404002,5392.24335,5391.200363,5385.985428,5366.168672,1608.352872,23539.889452
2018,509409.097918,248790.8003,246524.465702,-233387.666365,204803.953944,202130.371439,188112.898505,181088.890242,163455.0559,160161.438459,...,5638.958134,-5624.399022,-5619.002987,5604.545686,5553.843322,5552.417954,5528.390328,5527.270396,1685.272675,24408.06993


In [32]:
df_raw_revenue = df_raw_revenue[['Mean Revenue Information Technology ($ millions)', 'Mean Revenue Not Information Technology ($ millions)' ]]
df_raw_revenue

Unnamed: 0_level_0,Mean Revenue Information Technology ($ millions),Mean Revenue Not Information Technology ($ millions)
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,1233.635772,21660.498993
2011,1428.535698,23085.704465
2012,1611.094751,24557.143079
2013,1681.570455,24794.080012
2014,1684.600103,24688.241927
2015,1298.989574,25717.156739
2016,1467.594552,24086.771651
2017,1608.352872,23539.889452
2018,1685.272675,24408.06993


In [None]:
#df_raw_revenue.to_csv('final_mean_revenue_1980-2018.csv')

In [None]:
#table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
#df_company_info = table[0]
#df_company_info.to_csv('S&P500-Info.csv')
#df_company_info = df_company_info[['Symbol','Security', 'GICS Sector', 'GICS Sub Industry']]
#df_company_info.to_csv('S&P500-Condensed.csv')
#df_company_info

#url = "https://fortune.com/fortune500/2014/apple/"
#page = requests.get(url)
#soup = BeautifulSoup(page.content, "html.parser")
#container = soup.body.table.findAll("td", {"class": "dataTable__value--3n5tL dataTable__valueAlignLeft--3uvNx"})
#print(container[2].div.string)
#print(getCompanySector("walmart", "2019"))

#url = "https://fortune.com/fortune500/2018/search/"
#page = requests.get(url)
#soup = BeautifulSoup(page.content, "html.parser")
#container = soup.findAll("div", {"class": "rt-tr-group"})
#soup.find("div", {"class": "searchWrapper__content--3nwCz"})
#print(container[2].div.string)


In [6]:
company_info = pd.read_csv('S&P500-Info.csv')
company_info = company_info[['Symbol', 'Security', 'GICS Sector', 'GICS Sub Industry', 'Headquarters Location']]
company_info

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub Industry,Headquarters Location
0,MMM,3M Company,Industrials,Industrial Conglomerates,"St. Paul, Minnesota"
1,ABT,Abbott Laboratories,Health Care,Health Care Equipment,"North Chicago, Illinois"
2,ABBV,AbbVie Inc.,Health Care,Pharmaceuticals,"North Chicago, Illinois"
3,ABMD,ABIOMED Inc,Health Care,Health Care Equipment,"Danvers, Massachusetts"
4,ACN,Accenture plc,Information Technology,IT Consulting & Other Services,"Dublin, Ireland"
...,...,...,...,...,...
500,YUM,Yum! Brands Inc,Consumer Discretionary,Restaurants,"Louisville, Kentucky"
501,ZBRA,Zebra Technologies,Information Technology,Electronic Equipment & Instruments,"Lincolnshire, Illinois"
502,ZBH,Zimmer Biomet Holdings,Health Care,Health Care Equipment,"Warsaw, Indiana"
503,ZION,Zions Bancorp,Financials,Regional Banks,"Salt Lake City, Utah"
