## BRONZE TO SILVER LAYER

### Silver Layer - Stock Price History


In [13]:
# Import necessary libraries and utility functions
import pandas as pd
import yfinance as yf
from common_utilities import (
    global_path,
    replace_punctuation_from_columns,
    check_files_availability,
    logger,
)

### Data Processing

- Initialize an empty list to store DataFrames.
- Read and concatenate data from multiple CSV files.


In [14]:
# df = pd.read_csv(global_path.tradehistory_silver_file_path)
# df = df[["exchange","segment","symbol"]]
# df = df.sort_values(by=["exchange","segment","symbol"])
# df = df.dropna()
# df = df.drop_duplicates()
# df["symbol"] = df["symbol"]+"."+df["exchange"]
# df["symbol"].to_list()

In [15]:
# Dictionary with symbols and filenames
symbols_to_files = {
    # "0P00017844.BO": "MIRAE-ASSET-TAX-SAVER-DIRECT-GROWTH.MF.csv",
    # "0P0000XVL9.BO": "SBI-MAGNUM-TAXGAIN-SCHEME-DIR-GR.MF.csv",
    # "BHAGERIA.NS": "BHAGERIA.NS.csv",
    # "BPCL.NS": "BPCL.NS.csv",
    # "GOLDBEES.NS": "GOLDBEES.NS.csv",
    # "HERANBA.NS": "HERANBA.NS.csv",
    # "IDEA.NS": "IDEA.NS.csv",
    # "INFY.NS": "INFY.NS.csv",
    # "IRCTC.NS": "IRCTC.NS.csv",
    # "KPITTECH.NS": "KPITTECH.NS.csv",
    # "LICI.NS": "LICI.NS.csv",
    # "NIFTYBEES.NS": "NIFTYBEES.NS.csv",
    # "PNB.NS": "PNB.NS.csv",
    # "SBIN.NS": "SBIN.NS.csv",
    # "TATACHEM.NS": "TATACHEM.NS.csv",
    # "TATAMOTORS.NS": "TATAMOTORS.NS.csv",
    # "TATAPOWER.NS": "TATAPOWER.NS.csv",
    # "VOLTAS.NS": "VOLTAS.NS.csv",
    # "YESBANK.NS": "YESBANK.NS.csv",
}

# Loop through each symbol and filename pair
for symbol, filename in symbols_to_files.items():
    try:
        file_path = global_path.stockprice_bronze_layer_path.joinpath(filename)
        # Fetch historical data
        stock = yf.Ticker(symbol)
        data = stock.history(start="2020-01-01", interval="1d")

        # Save to CSV file
        data.to_csv(file_path)
        logger.info(f"Data for {symbol} saved to {filename}")

    except Exception as e:
        logger.info(f"Error fetching data for {symbol}: {e}")

In [16]:
# Initialize an empty list to store DataFrames
df_stock_price_list = []

# Generate file paths for available CSV files in the Bronze layer
file_paths = check_files_availability(
    global_path.stockprice_bronze_layer_path, file_pattern="*.csv"
)

# Loop through List of all CSV files in the folder
for file_path in file_paths:
    logger.info(f"Processing file: {file_path}")
    # Read the CSV file
    df = pd.read_csv(file_path)

    # Extract stock name from file path
    df["stock_name"] = file_path.name.split(".")[0].upper().strip()

    # Append the DataFrame to the list
    df_stock_price_list.append(df)

# Concatenate all DataFrames into one
df = pd.concat(df_stock_price_list, ignore_index=True)

# Harmonize column names
df = replace_punctuation_from_columns(df)

# Remove all-NA columns from each DataFrame
df.dropna(how="all", axis=1, inplace=True)

2024-08-07T02:24:21Z - INFO - Number of Files Detected: 19
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\BHAGERIA.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\BPCL.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\GOLDBEES.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\HERANBA.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\IDEA.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\INFY.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\StockPrice\IRCTC.NS.csv
2024-08-07T02:24:21Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Ups

### Final Processing and Export

- Round numerical values to 2 decimal places.
- Sort the DataFrame by stock name and date.
- Save the processed data as a CSV file in the Silver layer.


In [17]:
# Convert datetime to date string
df["date"] = pd.to_datetime(df["date"]).dt.date

# Round numerical values to 2 decimal places
df = df.round(2)

# Sort the DataFrame by stock name and date
df = df.sort_values(by=["stock_name", "date"])

# Select relevant columns
df = df[
    [
        "date",
        "stock_name",
        "open",
        "high",
        "low",
        "close",
        "volume",
        "dividends",
        "stock_splits",
        "capital_gains",
    ]
]

# Save the result as a CSV file
df.to_csv(global_path.stockprice_silver_file_path, index=None)
logger.info(
    "SILVER Layer CSV file for Stock Price history successfully created at:"
)
logger.info(global_path.stockprice_silver_file_path.resolve())
# Log the DataFrame info
df.info()

2024-08-07T02:24:21Z - INFO - SILVER Layer CSV file for Stock Price history successfully created at:
2024-08-07T02:24:21Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\StockPrice\StockPrice_data.csv


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20728 entries, 0 to 20727
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           20728 non-null  object 
 1   stock_name     20728 non-null  object 
 2   open           20728 non-null  float64
 3   high           20728 non-null  float64
 4   low            20728 non-null  float64
 5   close          20728 non-null  float64
 6   volume         20728 non-null  int64  
 7   dividends      20728 non-null  float64
 8   stock_splits   20728 non-null  float64
 9   capital_gains  2266 non-null   float64
dtypes: float64(7), int64(1), object(2)
memory usage: 1.6+ MB
