## Retrieving S&P 500 stats from tradingview

In [9]:
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.2f' % x)
import json
from requests_html import HTMLSession
session = HTMLSession()

In [3]:
link = "https://www.tradingview.com/symbols/SPX/components/"
res = session.get(link)
res

<Response [200]>

In [5]:
table = res.html.find("table")
df = pd.read_html(table[0].html)[0]
df.head()

Unnamed: 0,Ticker,Price,Change % 1D,Change 1D,Technical Rating 1D,Volume 1D,Volume * Price 1D,Market cap,P/E(TTM),EPS(TTM),Employees,Sector
0,"AAgilent Technologies, Inc.",156.49USD,−1.06%,−1.68USD,Buy,809.12K,126.619M,46.332BUSD,37.84,4.20USD,18.1K,Health Technology
1,"AALAmerican Airlines Group, Inc.",16.83USD,9.71%,1.49USD,Strong Buy,96.17M,1.619B,10.938BUSD,—,−2.48USD,123.4K,Transportation
2,AAPAdvance Auto Parts Inc.,152.19USD,−0.71%,−1.09USD,Neutral,997.494K,151.809M,9.018BUSD,19.71,7.83USD,68K,Retail Trade
3,AAPLApple Inc,133.41USD,−0.06%,−0.08USD,Neutral,71.288M,9.511B,2.122TUSD,21.88,6.14USD,164K,Electronic Technology
4,ABBVAbbVie Inc.,152.22USD,−3.15%,−4.95USD,Sell,7.662M,1.166B,269.198BUSD,20.95,7.53USD,50K,Health Technology


In [130]:
select_columns = ["name","description", "total_liabilities",
                  "type","typespecs","close","currency",
                  "change","change_abs",
                  "Recommend.All","volume","market_cap_basic",
                  "fundamental_currency_code","price_earnings_ttm","earnings_per_share_basic_ttm",
                  "number_of_employees","total_revenue", "gross_profit", "ebitda", "net_income",
                  "total_assets", "total_debt", "total_current_assets",
                  "dividends_paid", "dps_common_stock_prim_issue_fy",
                  "total_shares_outstanding_fundamental", "sector","market"]

data = {"columns":select_columns,
        "ignore_unknown_fields":False,
        "options":{"lang":"en"},
        "range":[0,600],
        "sort":{"sortBy":"name","sortOrder":"asc"},
        "symbols":{"query":{"types":[]},"tickers":[],"groups":[{"type":"index","values":["SP:SPX"]}]},
        "markets":["america","canada"]}

postlink = "https://scanner.tradingview.com/global/scan"

In [131]:
res = session.post(postlink, data=json.dumps(data))
print(res)

<Response [200]>


In [132]:
js = json.loads(res.content)["data"]
info = [i["d"] for i in js]
stocks = pd.DataFrame(data=info, columns=select_columns)
stocks.typespecs = stocks.typespecs.apply(lambda x: " ".join(x))
display(stocks.head(3))

Unnamed: 0,name,description,total_liabilities,type,typespecs,close,currency,change,change_abs,Recommend.All,...,ebitda,net_income,total_assets,total_debt,total_current_assets,dividends_paid,dps_common_stock_prim_issue_fy,total_shares_outstanding_fundamental,sector,market
0,A,"Agilent Technologies, Inc.",,stock,common,156.49,USD,-1.06,-1.68,0.42,...,1899000000.0,1254000000.0,10532000000.0,2921000000.0,3778000000.0,-250000000.0,0.84,296072000.0,Health Technology,america
1,AAL,"American Airlines Group, Inc.",,stock,common,16.83,USD,9.71,1.49,0.6,...,1758000000.0,-1993000000.0,66652000000.0,44682000000.0,17377000000.0,0.0,0.0,649901000.0,Transportation,america
2,AAP,Advance Auto Parts Inc.,,stock,common,152.19,USD,-0.71,-1.09,-0.09,...,1012871000.0,616108000.0,12132079000.0,4086856000.0,6162519000.0,-160925000.0,3.25,59253700.0,Retail Trade,america


In [119]:
stocks[stocks.duplicated(subset="description", keep=False)].to_excel("choosing_share_class.xlsx", index=False)
stocks.to_excel("S&P500_Fundementals_Raw.xlsx", index=False)

Unnamed: 0,name,description,type,typespecs,close,currency,change,change_abs,Recommend.All,volume,...,ebitda,net_income,total_assets,total_debt,total_current_assets,dividends_paid,dps_common_stock_prim_issue_fy,total_shares_outstanding_fundamental,sector,market
0,A,"Agilent Technologies, Inc.",stock,common,156.49,USD,-1.06,-1.68,0.42,809121,...,1899000000.0,1254000000.0,10532000000.0,2921000000.0,3778000000.0,-250000000.0,0.84,296072000.0,Health Technology,america
1,AAL,"American Airlines Group, Inc.",stock,common,16.83,USD,9.71,1.49,0.6,96357971,...,1758000000.0,-1993000000.0,66652000000.0,44682000000.0,17377000000.0,0.0,0.0,649901000.0,Transportation,america
2,AAP,Advance Auto Parts Inc.,stock,common,152.19,USD,-0.71,-1.09,-0.09,997531,...,1012871000.0,616108000.0,12132079000.0,4086856000.0,6162519000.0,-160925000.0,3.25,59253700.0,Retail Trade,america


## Quick Data Cleaning

In [122]:
df = pd.read_excel("S&P500_Fundementals_Raw.xlsx")

# Removing duplicate entries of companies with multiple share classes, and the s&p 500 index itself
df = df.drop_duplicates(subset="description").drop(df[df.name == "SPY"].index[0])

# Removing Columns not relevant to descriptive stats
df = df.drop(["change", "change_abs", "market", "currency", "fundamental_currency_code", "typespecs"], axis=1)
# Renaming Columns for house keeping
df = df.rename(columns={"Recommend.All": "rating", "market_cap_basic": "market_cap", "price_earnings_ttm": "pe_ttm", "earnings_per_share_basic_ttm": "eps_ttm", "dps_common_stock_prim_issue_fy": "dividend_ttm", "total_shares_outstanding_fundamental":"shares_outstanding"})

df.to_excel("S&P500_Fundementals.xlsx", index=False)

In [123]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500 entries, 0 to 503
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   name                  500 non-null    object 
 1   description           500 non-null    object 
 2   type                  500 non-null    object 
 3   close                 500 non-null    float64
 4   rating                500 non-null    float64
 5   volume                500 non-null    int64  
 6   market_cap            499 non-null    float64
 7   pe_ttm                468 non-null    float64
 8   eps_ttm               498 non-null    float64
 9   number_of_employees   494 non-null    float64
 10  total_revenue         499 non-null    float64
 11  gross_profit          446 non-null    float64
 12  ebitda                449 non-null    float64
 13  net_income            499 non-null    float64
 14  total_assets          499 non-null    float64
 15  total_debt            4