## BRONZE TO SILVER LAYER

### Silver Layer - Stock Price History


In [1]:
# Import necessary libraries and utility functions
import pandas as pd

from PortfolioTracker.globalpath import GlobalPath
from PortfolioTracker.utilities import (
    check_files_availability,
    replace_punctuation_from_columns,
)

In [2]:
# Instantiate GlobalPath
global_path = GlobalPath("PortfolioTracker")
# GLOBAL PATH
stockdata_bronze_layer_path = global_path.joinpath("DATA/BRONZE/StockData")
stockprice_silver_file_path = global_path.joinpath(
    "DATA/SILVER/StockPrice/StockPrice_data.csv"
)

### Data Processing

- Initialize an empty list to store DataFrames.
- Read and concatenate data from multiple CSV files.


In [3]:
# Initialize an empty list to store DataFrames
df_stock_price_list = []

# Generate file paths for available CSV files in the Bronze layer
file_paths = check_files_availability(
    stockdata_bronze_layer_path, file_pattern="*.csv"
)

# Loop through List of all CSV files in the folder
for file_path in file_paths:
    print(f"Processing file: {file_path}")
    # Read the CSV file
    df = pd.read_csv(file_path)

    # Extract stock name from file path
    df["stock_name"] = file_path.name.split(".")[0].upper().strip()

    # Append the DataFrame to the list
    df_stock_price_list.append(df)

# Concatenate all DataFrames into one
df = pd.concat(df_stock_price_list, ignore_index=True)

# Harmonize column names
df = replace_punctuation_from_columns(df)

# Remove all-NA columns from each DataFrame
df.dropna(how="all", axis=1, inplace=True)

Number of Files Detected: 211
Processing file: ..\..\DATA\BRONZE\StockData\2020\04\TATAMOTORS.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\05\BHAGERIA.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\05\TATAMOTORS.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\06\BHAGERIA.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\06\TATAMOTORS.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\07\IDEA.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\07\PNB.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\07\TATAMOTORS.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\07\YESBANK.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\08\IDEA.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\08\PNB.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\08\TATAMOTORS.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\08\YESBANK.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\09\IDEA.csv
Processing file: ..\..\DATA\BRONZE\StockData\2020\09\PNB.csv

### Final Processing and Export

- Round numerical values to 2 decimal places.
- Sort the DataFrame by stock name and date.
- Save the processed data as a CSV file in the Silver layer.


In [4]:
# Convert datetime to date string
df["date"] = pd.to_datetime(df["date"]).dt.date

# Round numerical values to 2 decimal places
df = df.round(2)

# Sort the DataFrame by stock name and date
df = df.sort_values(by=["stock_name", "date"])

# Select relevant columns
df = df[
    [
        "date",
        "stock_name",
        "open",
        "high",
        "low",
        "close",
        "volume",
    ]
]

# Save the result as a CSV file
df.to_csv(stockprice_silver_file_path, index=None)
print("SILVER Layer CSV file for Stock Price history successfully created at:")
print(stockprice_silver_file_path)
# Log the DataFrame debug
df.info()

SILVER Layer CSV file for Stock Price history successfully created at:
..\..\DATA\SILVER\StockPrice\StockPrice_data.csv
<class 'pandas.core.frame.DataFrame'>
Index: 4119 entries, 17 to 1993
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        4119 non-null   object 
 1   stock_name  4119 non-null   object 
 2   open        4119 non-null   float64
 3   high        4119 non-null   float64
 4   low         4119 non-null   float64
 5   close       4119 non-null   float64
 6   volume      4119 non-null   int64  
dtypes: float64(4), int64(1), object(2)
memory usage: 257.4+ KB
