# Problem Description

1. Using a finance API, fetch the closing pricies of all stocks of the indian stock market for the past 10 years for each day the market was open.
2. Organize data into a table. This is simple, first column will include the stock name/stock symbol. The rest of the columns are the dates, for example, the first column will be the last date of the last FY, then the second last, then third last, so on for last 10 years. The values of these columns are the closing prices for the day of the corresponsind stock.
3. Then, perform mathematical formulation on the table, using Gann Cycles and CHM. The flow is simple. User inputs a start date and end date. The program will calculate the above methods for all stocks and fetch the list of top performing stocks in that time period/cycle.
4. All of the above features will be implemented using various functions. All being called and used in a separate "main" function.
5. Another separate function will fetch the list of poor performers in that time period.
6. Future Optimizations: 
    - A function to update the data for the newer financial year will exist. This will simply check whether the data already exists or not. Of not, it will create the entire table including data till present date. if it exists, it simply fetches and updates the data from the last date in the data to current date. Normally, i would like to include data for complete financial years and not for financial years half way. 
    - A function will take a stock symbol, start date and ending date for a time period, calculate cycles and other relevant methods as mentioned above. Then it tell whether that stock will perform good in that time period or not by simply showing a set of numbers These number can be %change, and other  statistical methods.

# Imports and Globals

In [1]:
# %pip install selenium pandas webdriver-manager

In [2]:
import os
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

In [3]:
# Global variables
BSE_SUFFIX = ".BO"
NSE_SUFFIX = ".NS"

ALL_SYMBOLS = None
symbols_defined = False

all_data_exists = False
combined_data = None

bse_scrapped_data_exists = False
bse_scrapped_data = None

all_20_yrs_data = None
all_20_yrs_data_exists = False

gt_10_yrs_data = None
gt_10_yrs_data_exists = False

end_date = "2024-03-31"
start_date = "2003-04-01"

In [None]:
scrapped_data_folder = './scrapped_data'
data_files_folder = './data_files'
nse_list_path = f"{data_files_folder}/NSE_STOCK_LIST.csv"
bse_list_path = f"{data_files_folder}/BSE_STOCK_LIST.csv"
all_symbols_path = f"{data_files_folder}/ALL_SYMBOLS.csv"
combined_stock_data_path = f"{scrapped_data_folder}/combined_stock_data.csv"
bse_scrapped_data_path = f"{scrapped_data_folder}/bse_scrapped_data.csv"
gt_10_yrs_data_path = f"{scrapped_data_folder}/gt_10_yrs_data.csv"
all_20_yrs_data_path = f"{scrapped_data_folder}/all_20_yrs_data.csv"

In [None]:
try:
    combined_data = pd.read_csv(combined_stock_data_path)
    combined_data_exists = True
    print(f"Shape of DataFrame of 'combined_data' is {combined_data.shape}")
except Exception as e:
    print(f"'combined_stock_data.csv' File does not exist")

try:
    bse_scrapped_data = pd.read_csv(bse_scrapped_data_path)
    bse_scrapped_data_exists = True
    print(f"Shape of DataFrame of 'combined_data' is {bse_scrapped_data.shape}")
except Exception as e:
    print(f"'{bse_scrapped_data_path}' File does not exist")

try:
    gt_10_yrs_data = pd.read_csv(gt_10_yrs_data_path)
    gt_10_yrs_data_exists = True
    print(f"Shape of DataFrame of 'gt_10_yrs_data' is {gt_10_yrs_data.shape}")
except Exception as e:
    print(f"'gt_10_yrs_data.csv' File does not exist")

try:
    all_20_yrs_data = pd.read_csv(all_20_yrs_data_path)
    all_20_yrs_data_exists = True
    print(f"Shape of DataFrame of 'all_20_years_data' is {all_20_yrs_data.shape}")
except Exception as e:
    print(f"'all_20_years_data.csv' File does not exist")


# Getting the List of Stocks

In [None]:
NSE_STOCKS = pd.read_csv(nse_list_path)
print(f"Column Headers of Table: {list(NSE_STOCKS.columns)}")
NSE_SYMBOLS = list(NSE_STOCKS['SYMBOL'])
print(f"Symbols: {NSE_SYMBOLS}")

In [None]:
BSE_STOCKS = pd.read_csv(bse_list_path)
print(f"Column Headers of Table: {list(BSE_STOCKS.columns)}")
BSE_SYMBOLS = list(BSE_STOCKS['Security Id'])
print(f"Symbols: {BSE_SYMBOLS}")

In [None]:
# Combine the symbols into a set to remove duplicates
try:
    ALL_SYMBOLS = pd.read_csv(all_symbols_path)
    symbols_defined = True
except Exception as e:
    print(f"'ALL_SYMBOLS.csv' File does not exist")

if (not symbols_defined):
    all_symbols_set = set(NSE_SYMBOLS + BSE_SYMBOLS)
    all_symbols_set = set(NSE_SYMBOLS + BSE_SYMBOLS)
    all_symbols_list = list(all_symbols_set)
    all_symbols_list.sort()
    # Convert the set back to a pandas Series
    ALL_SYMBOLS = pd.DataFrame(list(all_symbols_set))
    ALL_SYMBOLS.to_csv(all_symbols_path, index=False)


print(ALL_SYMBOLS)

# The Method that Scraps the Yahoo Finance Website

In [None]:
progress_file = 'last_completed_stock.txt'  # File to store the last processed stock
error_flag_file = 'error_flag.txt'          # File to store the error status
output_file = bse_scrapped_data_path       # Final output CSV file

# Initialize WebDriver
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run Chrome in headless mode
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Load the last completed stock index
def load_last_completed_stock():
    if os.path.exists(progress_file):
        with open(progress_file, 'r') as file:
            return int(file.read())
    return 0

# Save the last completed stock index
def save_last_completed_stock(stock_idx):
    with open(progress_file, 'w') as file:
        file.write(str(stock_idx))

# Load the error flag status
def load_error_flag():
    if os.path.exists(error_flag_file):
        with open(error_flag_file, 'r') as file:
            return file.read() == 'True'
    return False

# Save the error flag status
def save_error_flag(flag):
    with open(error_flag_file, 'w') as file:
        file.write(str(flag))

# Remove the previous data for the stock (in case of error)
def remove_previous_data(stock_symbol):
    if os.path.exists(output_file):
        df = pd.read_csv(output_file)
        if stock_symbol in df.columns:
            df = df.drop(columns=[stock_symbol])
            df.to_csv(output_file, index=False)

# Function to fetch stock data from Yahoo Finance
def fetch_stock_data(driver, symbol, start_date, end_date):
    stock_data = []
    try:
        url = f"https://finance.yahoo.com/quote/{symbol}/history?period1={date_to_unix(start_date)}&period2={date_to_unix(end_date)}&interval=1d&filter=history&frequency=1d"
        driver.get(url)
        time.sleep(3)
        rows = driver.find_elements(By.XPATH, '//table[contains(@class, "W(100%)")]/tbody/tr')
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, 'td')
            if len(cols) > 0:
                date = cols[0].text
                close_price = cols[4].text.replace(",", "")  # Remove commas in the prices
                stock_data.append([date, close_price])
        return stock_data
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return None

# Function to process the stocks in chunks of 200
def process_stocks_in_chunks(driver, stock_symbols, suffix, start_date, end_date):
    last_completed = load_last_completed_stock()
    error_flag = load_error_flag()

    for i in range(last_completed, len(stock_symbols), 200):
        chunk_symbols = stock_symbols[i:i + 200]
        data = pd.DataFrame()

        for j, symbol in enumerate(chunk_symbols):
            full_symbol = symbol + suffix

            # If there was an error last time, remove the previous data and retry
            if error_flag and j == 0:  # Start from the last failed stock
                remove_previous_data(full_symbol)
                error_flag = False
                save_error_flag(False)

            stock_data = fetch_stock_data(driver, full_symbol, start_date, end_date)
            if stock_data is None:
                # If there's an error, save the current state and exit
                save_error_flag(True)
                save_last_completed_stock(i + j)
                driver.quit()
                return

            df = pd.DataFrame(stock_data, columns=['Date', symbol])
            df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
            df[symbol] = pd.to_numeric(df[symbol], errors='coerce').fillna(0)  # Fill missing values with 0
            df.set_index('Date', inplace=True)
            data = pd.concat([data, df], axis=1)

        # Append the chunk to the CSV file
        if os.path.exists(output_file):
            existing_data = pd.read_csv(output_file, index_col='Date')
            combined_data = pd.concat([existing_data, data], axis=1)
            combined_data.to_csv(output_file)
        else:
            data.to_csv(output_file)
        
        # Mark this chunk as completed
        save_last_completed_stock(i + len(chunk_symbols))

    driver.quit()
    print("All stocks processed successfully.")

# Convert date to Unix timestamp
def date_to_unix(date_str):
    return int(time.mktime(time.strptime(date_str, '%Y-%m-%d')))

# Main function to fetch BSE data
def main():
    start_date = '2003-04-01'
    end_date = '2024-03-31'
    bse_stocks = ['RELIANCE', 'TCS', 'INFY']  # Replace with full list of BSE stock symbols

    driver = init_driver()
    
    # Process stocks in chunks of 200
    process_stocks_in_chunks(driver, bse_stocks, BSE_SUFFIX, start_date, end_date)

if not combined_data_exists:
    main()
else:
    print("BSE data exists")

# Function to Update Data
Run it only at the end of financial year to get the ending financial year's data and update the combined stock data csv file. Run the functions to get the 10 years and 20 years stocks list data to update them as well.