## BRONZE TO SILVER LAYER

### Silver Layer - Stock Price History


In [None]:
# Importing Common Utility Function
import re

import pandas as pd

from StockETL import GlobalPath

In [None]:
# Import necessary libraries and utility functions
%run ../COMMON/common_utility.ipynb

In [None]:
# Instantiate GlobalPath
stockdata_bronze_layer_path = GlobalPath("DATA/BRONZE/StockData")
stockprice_silver_file_path = GlobalPath("DATA/SILVER/StockPrice/StockPrice_data.csv")
stockprice_silver_schema_file_path = GlobalPath(
    "CONFIG/DATA_CONTRACTS/SILVER/StockPrice.json"
)

### Data Processing

- Initialize an empty list to store DataFrames.
- Read and concatenate data from multiple CSV files.


In [None]:
# Initialize an empty list to store individual stock price DataFrames
df_stock_price_list = []

# Generate file paths for available CSV files in the Bronze layer
file_paths = check_files_availability(stockdata_bronze_layer_path, file_pattern="*.csv")

# Loop through the list of CSV files in the folder
for file_path in file_paths:
    print(f"Processing file => {file_path}")
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    # Extract the stock symbol by removing the date pattern from the file name
    df["symbol"] = re.sub(r"_\d{4}_\d{2}\.csv", "", file_path.name)
    # Append the DataFrame to the list of DataFrames
    df_stock_price_list.append(df)

# Concatenate all individual DataFrames into one combined DataFrame
df = pd.concat(df_stock_price_list, ignore_index=True)
# Standardize column names by replacing punctuation
df = replace_punctuation_from_columns(df)
# Remove columns that contain only NA values
df.dropna(how="all", axis=1, inplace=True)

### Final Processing and Export

- Sort the DataFrame by stock name and date.
- Save the processed data as a CSV file in the Silver layer.

In [None]:
# Convert datetime to date string
df["date"] = pd.to_datetime(df["date"]).dt.date

# Align Datafame with DataContract
df = align_with_datacontract(df, stockprice_silver_schema_file_path)

# Save the result as a CSV file
df.to_csv(stockprice_silver_file_path, index=None)

print("SILVER Layer CSV file for Stock Price history successfully created at =>")
print(stockprice_silver_file_path)
# Log the DataFrame debug
df.info()