In [2]:
# !pip install yfinance --upgrade

#  The Dow Jones Index Constituents

__"The Dow Jones Industrial Average (DJIA), Dow Jones, or simply the Dow, is a stock market index of 30 prominent companies listed on stock exchanges in the United States.[...]"__ (Wikipedia) https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average

(hint: you can load tables from a website with pd.read_html(url))

## Loading the DJIA 30 Constituents from the Web

In [3]:
import yfinance as yf
from yahooquery import Ticker
import pandas as pd

In [4]:
url = "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average"
url

'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'

In [5]:
df = pd.read_html(url)[1]
df

Unnamed: 0,Company,Exchange,Symbol,Industry,Date added,Notes,Index weighting
0,3M,NYSE,MMM,Conglomerate,1976-08-09,As Minnesota Mining and Manufacturing,2.41%
1,American Express,NYSE,AXP,Financial services,1982-08-30,,3.02%
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,,5.48%
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,,2.84%
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,,3.36%
5,Caterpillar,NYSE,CAT,Construction and mining,1991-05-06,,4.52%
6,Chevron,NYSE,CVX,Petroleum industry,2008-02-19,Also 1930-07-18 to 1999-11-01,3.50%
7,Cisco,NASDAQ,CSCO,Information technology,2009-06-08,,0.96%
8,Coca-Cola,NYSE,KO,Drink industry,1987-03-12,Also 1932-05-26 to 1935-11-20,1.22%
9,Disney,NYSE,DIS,Broadcasting and entertainment,1991-05-06,,1.89%


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Company          30 non-null     object
 1   Exchange         30 non-null     object
 2   Symbol           30 non-null     object
 3   Industry         30 non-null     object
 4   Date added       30 non-null     object
 5   Notes            5 non-null      object
 6   Index weighting  30 non-null     object
dtypes: object(7)
memory usage: 1.8+ KB


In [7]:
df.rename(columns = {"Date added":"Date_Added"}, inplace = True)
df.rename(columns = {"Index weighting":"Weights"}, inplace = True)

In [8]:
df.Date_Added = pd.to_datetime(df.Date_Added)

In [9]:
df.Weights = pd.to_numeric(df.Weights.str.replace("%", ""))

In [10]:
df

Unnamed: 0,Company,Exchange,Symbol,Industry,Date_Added,Notes,Weights
0,3M,NYSE,MMM,Conglomerate,1976-08-09,As Minnesota Mining and Manufacturing,2.41
1,American Express,NYSE,AXP,Financial services,1982-08-30,,3.02
2,Amgen,NASDAQ,AMGN,Biopharmaceutical,2020-08-31,,5.48
3,Apple,NASDAQ,AAPL,Information technology,2015-03-19,,2.84
4,Boeing,NYSE,BA,Aerospace and defense,1987-03-12,,3.36
5,Caterpillar,NYSE,CAT,Construction and mining,1991-05-06,,4.52
6,Chevron,NYSE,CVX,Petroleum industry,2008-02-19,Also 1930-07-18 to 1999-11-01,3.5
7,Cisco,NASDAQ,CSCO,Information technology,2009-06-08,,0.96
8,Coca-Cola,NYSE,KO,Drink industry,1987-03-12,Also 1932-05-26 to 1935-11-20,1.22
9,Disney,NYSE,DIS,Broadcasting and entertainment,1991-05-06,,1.89


In [11]:
df.Weights.sum()

100.00999999999999

In [12]:
df.drop(columns = "Notes", inplace = True)

In [13]:
df.set_index("Symbol", inplace = True)

In [14]:
df

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MMM,3M,NYSE,Conglomerate,1976-08-09,2.41
AXP,American Express,NYSE,Financial services,1982-08-30,3.02
AMGN,Amgen,NASDAQ,Biopharmaceutical,2020-08-31,5.48
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84
BA,Boeing,NYSE,Aerospace and defense,1987-03-12,3.36
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5
CSCO,Cisco,NASDAQ,Information technology,2009-06-08,0.96
KO,Coca-Cola,NYSE,Drink industry,1987-03-12,1.22
DIS,Disney,NYSE,Broadcasting and entertainment,1991-05-06,1.89


In [15]:
symbols = df.index.to_list()
symbols

['MMM',
 'AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CVX',
 'CSCO',
 'KO',
 'DIS',
 'DOW',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'MCD',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'CRM',
 'TRV',
 'UNH',
 'VZ',
 'V',
 'WBA',
 'WMT']

In [16]:
last_update = df["Date_Added"].max()
last_update

Timestamp('2020-08-31 00:00:00')

## Load Historical Price/Volume Data (Time Series)

In [17]:
symbols

['MMM',
 'AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CVX',
 'CSCO',
 'KO',
 'DIS',
 'DOW',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'MCD',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'CRM',
 'TRV',
 'UNH',
 'VZ',
 'V',
 'WBA',
 'WMT']

In [18]:
last_update

Timestamp('2020-08-31 00:00:00')

In [19]:
ts = yf.download(tickers = symbols, start = last_update)
ts

[*********************100%%**********************]  30 of 30 completed


Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-08-31,126.748955,230.460190,97.282433,171.820007,132.653931,272.649994,38.233528,73.758072,131.869995,39.024300,...,7929273,28774200,3519800,5596800,2297900,4341600,9326900,14555500,7940100,15078800
2020-09-01,131.797684,228.222168,98.125122,172.100006,136.140152,281.250000,38.061470,73.011086,133.550003,40.883839,...,6797433,25725500,4912400,5671800,969900,2487100,5642900,13358400,10728300,35599400
2020-09-02,129.067062,234.827011,99.973289,174.779999,139.141663,276.690002,38.414646,73.107780,135.389999,42.570389,...,11571597,34080800,6775400,8384800,1391400,2846500,9863800,21711900,7672400,17222000
2020-09-03,118.733841,225.538376,100.030739,168.770004,136.801971,265.010010,37.083443,72.308037,133.240005,41.852520,...,12983672,58400300,7264400,7277900,1299400,3872500,11310200,23126800,8278000,16005000
2020-09-04,118.812424,225.984192,101.189438,171.050003,138.125641,254.699997,36.965721,72.000481,131.990005,42.034145,...,9359688,59664100,5157600,7437700,1460700,2901000,13243600,22250100,6994400,11327400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-02,177.570007,266.589996,150.850006,192.009995,239.119995,208.110001,52.750000,148.759995,83.290001,48.990002,...,5379100,24348100,8135100,5859200,1284300,2350900,6286700,24946000,9311000,5164100
2023-11-03,176.649994,269.859985,152.759995,195.050003,240.750000,207.470001,53.009998,147.610001,85.070000,49.730000,...,6005900,23624000,7563300,6587700,1682400,2739500,5029800,23311800,8332100,5746400
2023-11-06,179.229996,272.829987,152.990005,192.949997,238.279999,207.419998,53.290001,147.000000,84.019997,49.340000,...,5951200,23828300,5231900,6480800,1327700,2684900,5254100,15544000,7649000,4843000
2023-11-07,181.820007,271.109985,152.979996,191.410004,234.919998,211.839996,53.130001,144.410004,84.589996,48.529999,...,6616400,25833900,7745800,6003800,1022300,2604800,4219700,17654600,6667400,5007800


In [20]:
ts.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 804 entries, 2020-08-31 to 2023-11-08
Columns: 180 entries, ('Adj Close', 'AAPL') to ('Volume', 'WMT')
dtypes: float64(150), int64(30)
memory usage: 1.1 MB


In [21]:
ts.Close

Unnamed: 0_level_0,AAPL,AMGN,AXP,BA,CAT,CRM,CSCO,CVX,DIS,DOW,...,MRK,MSFT,NKE,PG,TRV,UNH,V,VZ,WBA,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08-31,129.039993,253.320007,101.589996,171.820007,142.309998,272.649994,42.220001,83.930000,131.869995,45.119999,...,81.364502,225.529999,111.889999,138.330002,116.040001,312.549988,211.990005,59.270000,38.020000,138.850006
2020-09-01,134.179993,250.860001,102.470001,172.100006,146.050003,281.250000,42.029999,83.080002,133.550003,47.270000,...,80.562981,227.270004,114.839996,138.179993,114.690002,312.829987,213.350006,59.160000,36.759998,147.589996
2020-09-02,131.399994,258.119995,104.400002,174.779999,149.270004,276.690002,42.419998,83.190002,135.389999,49.220001,...,82.948471,231.649994,116.800003,140.509995,117.480003,320.239990,216.479996,60.529999,37.169998,147.679993
2020-09-03,120.879997,247.910004,104.459999,168.770004,146.759995,265.010010,40.950001,82.279999,133.240005,48.389999,...,81.574425,217.300003,112.849998,138.259995,115.349998,316.230011,208.960007,60.610001,37.090000,144.539993
2020-09-04,120.959999,248.399994,105.669998,171.050003,148.179993,254.699997,40.820000,81.930000,131.990005,48.599998,...,81.335876,214.250000,112.400002,137.960007,116.570000,312.000000,204.660004,60.480000,36.889999,142.830002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-02,177.570007,266.589996,150.850006,192.009995,239.119995,208.110001,52.750000,148.759995,83.290001,48.990002,...,102.849998,348.320007,105.080002,151.440002,168.949997,536.130005,243.250000,35.900002,21.500000,165.520004
2023-11-03,176.649994,269.859985,152.759995,195.050003,240.750000,207.470001,53.009998,147.610001,85.070000,49.730000,...,103.379997,352.799988,107.059998,150.070007,168.410004,530.900024,243.600006,36.020000,22.110001,164.660004
2023-11-06,179.229996,272.829987,152.990005,192.949997,238.279999,207.419998,53.290001,147.000000,84.019997,49.340000,...,104.339996,356.529999,107.250000,150.940002,169.550003,533.460022,243.490005,35.639999,21.770000,164.880005
2023-11-07,181.820007,271.109985,152.979996,191.410004,234.919998,211.839996,53.130001,144.410004,84.589996,48.529999,...,104.000000,360.529999,109.360001,150.589996,169.990005,537.830017,244.770004,35.939999,21.650000,165.649994


In [22]:
ts.dropna(inplace = True)

In [23]:
perf = ts.Close.iloc[-1].div(ts.Close.iloc[0]).sub(1).sort_values(ascending = False)
perf # total price increase/decrease (in %) since last reconstitution

UNH     0.717261
CVX     0.696652
CAT     0.650060
MSFT    0.610429
GS      0.584224
AXP     0.506152
TRV     0.452603
JPM     0.444456
AAPL    0.417313
MRK     0.283115
IBM     0.255692
CSCO    0.255329
MCD     0.252763
WMT     0.183291
KO      0.152635
V       0.150573
BA      0.116459
HON     0.115192
PG      0.084436
AMGN    0.078715
DOW     0.070479
HD      0.038170
JNJ    -0.019947
NKE    -0.022343
CRM    -0.224390
INTC   -0.255741
DIS    -0.359217
VZ     -0.396491
MMM    -0.433689
WBA    -0.447659
dtype: float64

In [24]:
perf.index.name = "Symbol"

In [25]:
perf

Symbol
UNH     0.717261
CVX     0.696652
CAT     0.650060
MSFT    0.610429
GS      0.584224
AXP     0.506152
TRV     0.452603
JPM     0.444456
AAPL    0.417313
MRK     0.283115
IBM     0.255692
CSCO    0.255329
MCD     0.252763
WMT     0.183291
KO      0.152635
V       0.150573
BA      0.116459
HON     0.115192
PG      0.084436
AMGN    0.078715
DOW     0.070479
HD      0.038170
JNJ    -0.019947
NKE    -0.022343
CRM    -0.224390
INTC   -0.255741
DIS    -0.359217
VZ     -0.396491
MMM    -0.433689
WBA    -0.447659
dtype: float64

In [26]:
df

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MMM,3M,NYSE,Conglomerate,1976-08-09,2.41
AXP,American Express,NYSE,Financial services,1982-08-30,3.02
AMGN,Amgen,NASDAQ,Biopharmaceutical,2020-08-31,5.48
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84
BA,Boeing,NYSE,Aerospace and defense,1987-03-12,3.36
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5
CSCO,Cisco,NASDAQ,Information technology,2009-06-08,0.96
KO,Coca-Cola,NYSE,Drink industry,1987-03-12,1.22
DIS,Disney,NYSE,Broadcasting and entertainment,1991-05-06,1.89


In [27]:
df["Performance"] = perf
df

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights,Performance
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MMM,3M,NYSE,Conglomerate,1976-08-09,2.41,-0.433689
AXP,American Express,NYSE,Financial services,1982-08-30,3.02,0.506152
AMGN,Amgen,NASDAQ,Biopharmaceutical,2020-08-31,5.48,0.078715
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84,0.417313
BA,Boeing,NYSE,Aerospace and defense,1987-03-12,3.36,0.116459
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52,0.65006
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5,0.696652
CSCO,Cisco,NASDAQ,Information technology,2009-06-08,0.96,0.255329
KO,Coca-Cola,NYSE,Drink industry,1987-03-12,1.22,0.152635
DIS,Disney,NYSE,Broadcasting and entertainment,1991-05-06,1.89,-0.359217


In [28]:
df.sort_values(by = "Performance", ascending = False)

Unnamed: 0_level_0,Company,Exchange,Industry,Date_Added,Weights,Performance
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
UNH,UnitedHealth Group,NYSE,Managed health care,2012-09-24,10.29,0.717261
CVX,Chevron,NYSE,Petroleum industry,2008-02-19,3.5,0.696652
CAT,Caterpillar,NYSE,Construction and mining,1991-05-06,4.52,0.65006
MSFT,Microsoft,NASDAQ,Information technology,1999-11-01,4.88,0.610429
GS,Goldman Sachs,NYSE,Financial services,2019-04-02,7.36,0.584224
AXP,American Express,NYSE,Financial services,1982-08-30,3.02,0.506152
TRV,Travelers,NYSE,Insurance,2009-06-08,3.62,0.452603
JPM,JPMorgan Chase,NYSE,Financial services,1991-05-06,2.61,0.444456
AAPL,Apple,NASDAQ,Information technology,2015-03-19,2.84,0.417313
MRK,Merck,NYSE,Pharmaceutical industry,1979-06-29,2.1,0.283115


## More detailed Stock Data (cross-sectional)

__yahooquery__

In [29]:
msft = Ticker("MSFT")
msft

<yahooquery.ticker.Ticker at 0x1d609fbd190>

In [30]:
msft.key_stats["MSFT"] # key statistics

{'maxAge': 1,
 'priceHint': 2,
 'enterpriseValue': 2661133582336,
 'forwardPE': 30.469799,
 'profitMargins': 0.35314998,
 'floatShares': 7426019229,
 'sharesOutstanding': 7432260096,
 'sharesShort': 42992748,
 'sharesShortPriorMonth': 41145504,
 'sharesShortPreviousMonthDate': '2023-09-15 03:00:00',
 'dateShortInterest': '2023-10-13 03:00:00',
 'sharesPercentSharesOut': 0.0058,
 'heldPercentInsiders': 0.00052,
 'heldPercentInstitutions': 0.73212,
 'shortRatio': 1.98,
 'shortPercentOfFloat': 0.0058,
 'beta': 0.885,
 'impliedSharesOutstanding': 7432260096,
 'category': None,
 'bookValue': 29.702,
 'priceToBook': 12.228133,
 'fundFamily': None,
 'legalType': None,
 'lastFiscalYearEnd': '2023-06-30 03:00:00',
 'nextFiscalYearEnd': '2024-06-30 03:00:00',
 'mostRecentQuarter': '2023-09-30 03:00:00',
 'earningsQuarterlyGrowth': 0.27,
 'netIncomeToCommon': 77096001536,
 'trailingEps': 10.5,
 'forwardEps': 11.92,
 'pegRatio': 2.21,
 'lastSplitFactor': '2:1',
 'lastSplitDate': '2003-02-18 02:00:

In [31]:
msft.financial_data["MSFT"] # financial data

{'maxAge': 86400,
 'currentPrice': 363.2,
 'targetHighPrice': 414.96,
 'targetLowPrice': 268.78,
 'targetMeanPrice': 370.22,
 'targetMedianPrice': 377.24,
 'recommendationMean': 1.7,
 'recommendationKey': 'buy',
 'numberOfAnalystOpinions': 44,
 'totalCash': 143945007104,
 'totalCashPerShare': 19.368,
 'ebitda': 108530999296,
 'totalDebt': 105681002496,
 'quickRatio': 1.531,
 'currentRatio': 1.663,
 'totalRevenue': 218310000640,
 'debtToEquity': 47.881,
 'revenuePerShare': 29.347,
 'returnOnAssets': 0.1457,
 'returnOnEquity': 0.39106998,
 'freeCashflow': 50418876416,
 'operatingCashflow': 94966996992,
 'earningsGrowth': 0.272,
 'revenueGrowth': 0.128,
 'grossMargins': 0.69441,
 'ebitdaMargins': 0.49714002,
 'operatingMargins': 0.47587,
 'profitMargins': 0.35314998,
 'financialCurrency': 'USD'}

In [32]:
msft.summary_detail["MSFT"] # summary statistics

{'maxAge': 1,
 'priceHint': 2,
 'previousClose': 360.53,
 'open': 361.68,
 'dayLow': 360.55,
 'dayHigh': 363.87,
 'regularMarketPreviousClose': 360.53,
 'regularMarketOpen': 361.68,
 'regularMarketDayLow': 360.55,
 'regularMarketDayHigh': 363.87,
 'dividendRate': 3.0,
 'dividendYield': 0.0084,
 'exDividendDate': '2023-11-15 02:00:00',
 'payoutRatio': 0.2636,
 'fiveYearAvgDividendYield': 1.03,
 'beta': 0.885,
 'trailingPE': 34.590477,
 'forwardPE': 30.469799,
 'volume': 22943481,
 'regularMarketVolume': 22943481,
 'averageVolume': 22418103,
 'averageVolume10days': 29162530,
 'averageDailyVolume10Day': 29162530,
 'bid': 0.0,
 'ask': 0.0,
 'bidSize': 900,
 'askSize': 800,
 'marketCap': 2699396907008,
 'fiftyTwoWeekLow': 219.35,
 'fiftyTwoWeekHigh': 366.78,
 'priceToSalesTrailing12Months': 12.364971,
 'fiftyDayAverage': 330.1356,
 'twoHundredDayAverage': 308.9442,
 'trailingAnnualDividendRate': 2.79,
 'trailingAnnualDividendYield': 0.007738607,
 'currency': 'USD',
 'fromCurrency': None,
 '

In [33]:
symbols

['MMM',
 'AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CVX',
 'CSCO',
 'KO',
 'DIS',
 'DOW',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'JPM',
 'MCD',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'CRM',
 'TRV',
 'UNH',
 'VZ',
 'V',
 'WBA',
 'WMT']

In [34]:
cs = pd.DataFrame()
cs

# Extended

__Full US Market__

https://www.nasdaq.com/market-activity/stocks/screener

__S&P 500__

https://en.wikipedia.org/wiki/List_of_S%26P_500_companies

__German DAX 40__

https://en.wikipedia.org/wiki/DAX

__EURO STOXX 50__

https://en.wikipedia.org/wiki/EURO_STOXX_50

__FTSE 100__

https://en.wikipedia.org/wiki/FTSE_100_Index

__Listings for worldwide exchanges__

https://www.interactivebrokers.com/en/index.php?f=1562&p=north_america

__Example: India__

In [49]:
import pandas as pd

In [50]:
url = "https://www.interactivebrokers.com/en/index.php?f=2222&exch=nse&showcategories=STK&p=&cc=&limit=100&page={}"

In [51]:
pd.read_html(url.format(1))[2]

Unnamed: 0,IB Symbol,Product Description (click link for more details),Symbol,Currency
0,20MICRONS,20 MICRONS LIMITED,20MICRONS_BE,INR
1,21STCENMG,TWENTYFIRST CENTURY MANAGEMT,21STCENMGM,INR
2,360ONE,360 ONE WAM LTD,360ONE,INR
3,3IINFOLTD,3I INFOTECH LTD,3IINFOLTD,INR
4,3MINDIA,3M INDIA LTD,3MINDIA,INR
...,...,...,...,...
95,ALKALI,ALKALI METALS LIMITED,ALKALI,INR
96,ALKEM,ALKEM LABORATORIES LTD,ALKEM,INR
97,ALKYLAMIN,ALKYL AMINES CHEMICALS LTD,ALKYLAMINE,INR
98,ALLCARGO,ALLCARGO LOGISTICS LTD,ALLCARGO,INR


In [52]:
pages = 22

In [53]:
df = pd.DataFrame()

__Get all tickers from indian stock market__

In [54]:
for i in range(1, pages + 1):
    table = pd.read_html(url.format(i))[2]
    df = pd.concat([df, table])

In [55]:
df

Unnamed: 0,IB Symbol,Product Description (click link for more details),Symbol,Currency
0,20MICRONS,20 MICRONS LIMITED,20MICRONS_BE,INR
1,21STCENMG,TWENTYFIRST CENTURY MANAGEMT,21STCENMGM,INR
2,360ONE,360 ONE WAM LTD,360ONE,INR
3,3IINFOLTD,3I INFOTECH LTD,3IINFOLTD,INR
4,3MINDIA,3M INDIA LTD,3MINDIA,INR
...,...,...,...,...
95,TITAN,TITAN CO LTD,TITAN,INR
96,TMB,TAMILNAD MERCANTILE BANK,TMB,INR
97,TNIDETF,TATA NIFTY DIGITAL ETF,TNIDETF,INR
98,TNPETRO,TAMILNADU PETROPRODUCTS LTD,TNPETRO,INR


In [56]:
df.reset_index(drop = True, inplace = True)

In [57]:
df

Unnamed: 0,IB Symbol,Product Description (click link for more details),Symbol,Currency
0,20MICRONS,20 MICRONS LIMITED,20MICRONS_BE,INR
1,21STCENMG,TWENTYFIRST CENTURY MANAGEMT,21STCENMGM,INR
2,360ONE,360 ONE WAM LTD,360ONE,INR
3,3IINFOLTD,3I INFOTECH LTD,3IINFOLTD,INR
4,3MINDIA,3M INDIA LTD,3MINDIA,INR
...,...,...,...,...
2195,TITAN,TITAN CO LTD,TITAN,INR
2196,TMB,TAMILNAD MERCANTILE BANK,TMB,INR
2197,TNIDETF,TATA NIFTY DIGITAL ETF,TNIDETF,INR
2198,TNPETRO,TAMILNADU PETROPRODUCTS LTD,TNPETRO,INR


In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 4 columns):
 #   Column                                              Non-Null Count  Dtype 
---  ------                                              --------------  ----- 
 0   IB Symbol                                           2200 non-null   object
 1   Product Description  (click link for more details)  2200 non-null   object
 2   Symbol                                              2200 non-null   object
 3   Currency                                            2200 non-null   object
dtypes: object(4)
memory usage: 68.9+ KB


__Reminder__: to pull data for an [Indian] Stock from yfinance, you need to __add the exchange-specific Suffix__ [.NS]
https://help.yahoo.com/kb/SLN2310.html

In [59]:
import yfinance as yf

In [60]:
symbol = df.Symbol[0] + ".NS"
symbol

'20MICRONS_BE.NS'

In [61]:
yf.download(symbol)

[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['20MICRONS_BE.NS']: Exception('%ticker%: No timezone found, symbol may be delisted')





Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
