In [24]:
import pandas as pd
import numpy as np
import yfinance as yf
from requests.exceptions import HTTPError


In [25]:
ticker_symbols = [
    "RELIANCE.NS",
    "TCS.NS",
    "HDB",
    "IBN",
    "INFY",
    "SBIN.NS",
    "LICI.NS",
    "BHARTIARTL.NS",
    "HINDUNILVR.NS",
    "ITC.NS"]

In [26]:
# Counters
total_companies = len(ticker_symbols)
discarded_companies = 0
available_companies = 0

# Set start and end dates
start_date = '2014-01-01'
end_date = '2024-01-01'

# Create an empty DataFrame to store data for companies with sector and industry
all_stock_data = pd.DataFrame()

# Loop through each ticker symbol
for ticker_symbol in ticker_symbols:
    try:
        # Create a Ticker object
        ticker = yf.Ticker(ticker_symbol)

        # Get the info dictionary containing various information including sector and industry
        info = ticker.info

        # Get sector and industry information
        sector = info.get('sector')
        industry = info.get('industry')

        # Check if both sector and industry are not None
        if sector is not None and industry is not None:
            # Download historical data for the current ticker symbol
            stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)

            # Add stock information as columns to the DataFrame
            stock_data['Company Name'] = info['longName']
            stock_data['Sector'] = sector
            stock_data['Industry'] = industry

            # Append the stock data to the DataFrame for all stocks
            all_stock_data = pd.concat([all_stock_data, stock_data])

            available_companies += 1
        else:
            discarded_companies += 1
    except HTTPError as e:
        if e.response.status_code == 404:
            print(f"Error 404: Ticker {ticker_symbol} not found. Discarding...")
            discarded_companies += 1

# Print summary
print("Summary:")
print("Total companies input:", total_companies)
print("Number of companies discarded:", discarded_companies)
print("Number of companies available with sector and industry:", available_companies)

# Display the DataFrame
print("\nStock Data:")
print(all_stock_data)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Summary:
Total companies input: 10
Number of companies discarded: 0
Number of companies available with sector and industry: 10

Stock Data:
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2014-01-01  410.283936  410.649689  405.780853  406.329468  379.456024   
2014-01-02  405.826569  409.621063  397.780457  400.020569  373.564423   
2014-01-03  398.466187  399.266235  390.877228  395.106018  368.974915   
2014-01-06  394.123108  394.648865  388.591400  390.854370  365.004425   
2014-01-07  390.694366  393.140198  384.042603  384.979797  359.518402   
...                ...         ...         ...         ...         ...   
2023-12-22  453.000000  455.899994  450.549988  455.200012  448.611328   
2023-12-26  456.049988  458.899994  454.700012  456.450012  449.843231   
2023-12-27  457.000000  458.500000  453.750000  457.100006  450.483826   
2023-12-28  457.200012  464.700012  456.100006




In [27]:
df = pd.DataFrame(all_stock_data)
df


# ### Count of trading days for each company.

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Company Name,Sector,Industry
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2014-01-01,410.283936,410.649689,405.780853,406.329468,379.456024,2924699,Reliance Industries Limited,Energy,Oil & Gas Refining & Marketing
2014-01-02,405.826569,409.621063,397.780457,400.020569,373.564423,3011816,Reliance Industries Limited,Energy,Oil & Gas Refining & Marketing
2014-01-03,398.466187,399.266235,390.877228,395.106018,368.974915,6416948,Reliance Industries Limited,Energy,Oil & Gas Refining & Marketing
2014-01-06,394.123108,394.648865,388.591400,390.854370,365.004425,6657928,Reliance Industries Limited,Energy,Oil & Gas Refining & Marketing
2014-01-07,390.694366,393.140198,384.042603,384.979797,359.518402,8655735,Reliance Industries Limited,Energy,Oil & Gas Refining & Marketing
...,...,...,...,...,...,...,...,...,...
2023-12-22,453.000000,455.899994,450.549988,455.200012,448.611328,11026317,ITC Limited,Consumer Defensive,Tobacco
2023-12-26,456.049988,458.899994,454.700012,456.450012,449.843231,5871301,ITC Limited,Consumer Defensive,Tobacco
2023-12-27,457.000000,458.500000,453.750000,457.100006,450.483826,7543383,ITC Limited,Consumer Defensive,Tobacco
2023-12-28,457.200012,464.700012,456.100006,464.100006,457.382507,20696101,ITC Limited,Consumer Defensive,Tobacco


In [28]:
company_counts = df['company_counts'] = df['Company Name'].value_counts()
company_counts

Company Name
HDFC Bank Limited                      2516
ICICI Bank Limited                     2516
Infosys Limited                        2516
Reliance Industries Limited            2465
Tata Consultancy Services Limited      2465
State Bank of India                    2465
Bharti Airtel Limited                  2465
Hindustan Unilever Limited             2465
ITC Limited                            2465
Life Insurance Corporation of India     403
Name: count, dtype: int64

In [29]:
num_companies_with_2465 = (company_counts == 2465).sum()
num_companies_with_2465

6

"# Count of trading days for each company.\ncompany_counts = df['Company Name'].value_counts()\n\nnum_companies_with_2465 = (company_counts == 2465).sum()\n\nselected_companies = []\nfor company, days_count in df.groupby('Company Name')['Company Name'].first().items():\n    if days_count == 2465:\n        selected_companies.append(company)\n\ndf1 = df[df['Company Name'].isin(selected_companies)]\ndf1.drop(columns=['company_counts'], inplace=True)"


Filtered DataFrame:
Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume, Company Name, Sector, Industry]
Index: []
