## SOURCE TO BRONZE LAYER

### Process:

> The function fetches daily stock data using the Yahoo Finance API (`yfinance`).


In [1]:
# Importing Common Utility Funcation
%run ../00_COMMON/common_utility.ipynb

In [2]:
# Import necessary libraries and utility functions
import pandas as pd
import yfinance as yf

from ETLTools import DateTimeUtil, GlobalPath

In [3]:
# Define file paths
holdingshistory_gold_file_path = GlobalPath(
    "DATA/GOLD/Holdings/HoldingsHistory_data.csv"
)
symbol_silver_file_path = GlobalPath("DATA/SILVER/Symbol/Symbol_data.csv")
stockdata_bronze_layer_path = GlobalPath("DATA/BRONZE/StockData")

In [4]:
def process_file(
    stock_ticker,
    stock_name: str,
    date: DateTimeUtil,
    output_file: GlobalPath,
):
    """
    Processes historical stock data for a given symbol and saves it to a CSV file.

    Args:
        stock_ticker : stock_ticker of the stock.
        stock_name (str): The stock_name of the stock.
        date (DateTimeUtil): DateTimeUtil object representing the date.
        output_file (str): Path to the output CSV file where the processed data will be saved.
    """
    # Fetch historical data from Yahoo Finance for the specified date range
    df = stock_ticker.history(
        start=date.start_date,
        end=min(date.end_date, DateTimeUtil.today()),
        interval="1d",
    )

    # Check if the DataFrame is empty and raise an exception if no data is fetched
    if df.empty:
        raise Exception(
            f"No data fetched for {stock_name} from {date.start_date} to {date.end_date}"
        )

    # Reset the index to ensure date is a column
    df = df.reset_index()

    # Replace punctuation in column names for consistency
    df = replace_punctuation_from_columns(df)

    # Fix duplicate column names by appending numerical suffixes
    df = fix_duplicate_column_names(df)

    # Round numerical values to 2 decimal places
    df = df.round(2)

    # Save the processed DataFrame to a CSV file
    df.to_csv(output_file, index=False)

    # Log successful processing and saving of data
    print(f"Data processed and saved to: {output_file}")

In [5]:
def generate_date_list(start_date, end_date):
    """
    Generates a list of DateTimeUtil objects representing the first day of each month
    within the specified date range.

    Args:
        start_date (datetime.date): Start date of the range.
        end_date (datetime.date): End date of the range.

    Returns:
        List[DateTimeUtil]: List of DateTimeUtil objects for each month within the range.
    """
    month_list = []
    current_date = min(start_date, DateTimeUtil.today())
    end_date = min(end_date, DateTimeUtil.today())
    while current_date <= end_date:
        month_list.append(
            DateTimeUtil(current_date.year, current_date.month, 1)
        )
        if current_date.month == 12:
            current_date = current_date.replace(
                year=current_date.year + 1, month=1, day=1
            )
        else:
            current_date = current_date.replace(
                month=current_date.month + 1, day=1
            )
    return month_list

In [6]:
print("Running data processing pipeline...")

# Load holdings data
df_holdings_history = pd.read_csv(holdingshistory_gold_file_path)
print(f"Loaded data from: {holdingshistory_gold_file_path}")
# Replace scrip code with scrip_name name
df_symbol = pd.read_csv(symbol_silver_file_path)
print(f"Loaded data from: {symbol_silver_file_path}")


# Calculate the min and max dates for each stock
df_holdings_history["date"] = pd.to_datetime(df_holdings_history["date"])
df_holdings_history = (
    df_holdings_history.groupby(["segment", "exchange", "symbol"])
    .agg(min_date=("date", "min"), max_date=("date", "max"))
    .reset_index()
)

# Merge df_HoldingHistory with df_Symbol on the matching columns
df_holdings_history = df_holdings_history.merge(
    df_symbol[["symbol", "isin"]],
    left_on="symbol",
    right_on="symbol",
    how="left",
)

Running data processing pipeline...
Loaded data from: /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/GOLD/Holdings/HoldingsHistory_data.csv
Loaded data from: /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SILVER/Symbol/Symbol_data.csv


In [7]:
# Dictionary for overwriting specific stock tickers
overwrite_stock_ticker = {
    "BAJAJHFL": "BAJAJHFL.NS"
}

def get_stock_ticker(row):
    # Check if symbol has an override in the dictionary
    if row["symbol"] in overwrite_stock_ticker:
        stock_ticker = yf.Ticker(overwrite_stock_ticker.get(row["symbol"]))
        if stock_ticker.info and stock_ticker.info.get("regularMarketPrice"):
            return stock_ticker

    # Try the NSE ticker
    stock_ticker = yf.Ticker(row["symbol"] + ".NS")
    if stock_ticker.info and stock_ticker.info.get("regularMarketPrice"):
        return stock_ticker

    # Try the BSE ticker
    stock_ticker = yf.Ticker(row["symbol"] + ".BO")
    if stock_ticker.info and stock_ticker.info.get("regularMarketPrice"):
        return stock_ticker

    # Fallback to the ISIN ticker
    stock_ticker = yf.Ticker(row["isin"])
    if stock_ticker.info and stock_ticker.info.get("regularMarketPrice"):
        return stock_ticker

    # If no valid ticker is found, raise an error
    raise KeyError(f"Fix this key: {row['symbol']}")

In [8]:
# Iterate over each stock holding record
for _, row in df_holdings_history.iterrows():
    try:
        print(f"Processing data for symbol {row['symbol']}:")
        stock_ticker = get_stock_ticker(row)
        # Generate a list of months within the date range for each stock
        date_list = generate_date_list(
            row["min_date"].to_pydatetime(), row["max_date"].to_pydatetime()
        )
        for date in date_list:
            # Determine the output file path in the bronze layer
            output_file = stockdata_bronze_layer_path.joinpath(
                f"{row['symbol']}_{date.year:04d}_{date.month:02d}.csv"
            )
            # Check if the file exists and skip if it's older than 2 months
            if output_file.exists():
                month_difference = date.month_difference(DateTimeUtil.today())
                if month_difference >= 1:
                    continue

            # Process and save the file
            process_file(
                stock_ticker=stock_ticker,
                stock_name=row["symbol"],
                date=date,
                output_file=output_file,
            )
    except Exception as e:
        # Log any errors encountered during processing
        print(f"Error processing:\n{e}")

Processing data for symbol BAJAJHFL:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/BAJAJHFL.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol BHAGERIA:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/BHAGERIA.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol BPCL:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/BPCL.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol GOLDBEES:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/GOLDBEES.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol HERANBA:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/HERANBA.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol IDEA:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/IDEA.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol INFY:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/INFY.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol IRCTC:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/IRCTC.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol KPITTECH:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/KPITTECH.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol LICI:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/LICI.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol NIFTYBEES:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/NIFTYBEES.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol PNB:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/PNB.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol SBIN:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/SBIN.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol TATACHEM:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/TATACHEM.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol TATAMOTORS:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/TATAMOTORS.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol TATAPOWER:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/TATAPOWER.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol VOLTAS:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/VOLTAS.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol YESBANK:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/YESBANK.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol MIRAE_ASSET_ELSS_TAX_SAVER_FUND_DIRECT_PLAN_GROWTH:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/MIRAE_ASSET_ELSS_TAX_SAVER_FUND_DIRECT_PLAN_GROWTH.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true
Processing data for symbol SBI_LONG_TERM_EQUITY_FUND_DIRECT_PLAN_GROWTH:


Error processing:
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/SBI_LONG_TERM_EQUITY_FUND_DIRECT_PLAN_GROWTH.NS?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true


In [9]:
# import pandas as pd

# # Read the CSV file
# df = pd.read_csv(
#     https://www.nseindia.com/get-quotes/equity?symbol=GOLDBEES
# )

# # Rename columns to match the desired format
# df.columns = [
#     "Date",
#     "series",
#     "OPEN",
#     "HIGH",
#     "LOW",
#     "PREV. CLOSE",
#     "ltp",
#     "close",
#     "vwap",
#     "52W H",
#     "52W L",
#     "VOLUME",
#     "VALUE",
#     "No of trades",
# ]

# # Select and rename columns as per the desired output
# df = df[["Date", "OPEN", "HIGH", "LOW", "close", "VOLUME"]].copy()

# # Rename columns to the new format
# df.columns = ["Date", "open", "high", "low", "close", "volume"]

# # Convert 'Date' column to datetime format and adjust timezone
# df["Date"] = pd.to_datetime(df["Date"], format="%d-%b-%Y")
# df["date"] = df["Date"].dt.strftime("%Y-%m-%d") + " 00:00:00+05:30"

# # Ensure 'volume' is an integer
# df["volume"] = df["volume"].str.replace(",", "").astype(float).astype(int)

# # Add placeholder columns for dividends and stock_splits
# df["dividends"] = 0.0
# df["stock_splits"] = 0.0

# year = 2021
# for month in range(2, 7):
#     output_file = stockdata_bronze_layer_path.joinpath(
#         f"{year:04d}/{month:02d}/GOLDBEES.csv"
#     )
#     df[df["Date"].dt.month == month][[
#         "date",
#         "open",
#         "high",
#         "low",
#         "close",
#         "volume",
#         "dividends",
#         "stock_splits",
#     ]].to_csv(output_file, index=False)