## SOURCE TO BRONZE LAYER

> This Notebook reads the RAW files and performs data harmonization.


In [1]:
# Importing Common Utility Function
import pandas as pd
from StockETL import GlobalPath

In [2]:
# Import necessary libraries and utility functions
%run ../COMMON/common_utility.ipynb

In [3]:
# Instantiate GlobalPath
tradehistory_bronze_layer_path = GlobalPath("DATA/BRONZE/TradeHistory")
tradehistory_source_layer_path = GlobalPath("DATA/SOURCE/TradeHistory")
tradehistory_bronze_schema_file_path = GlobalPath(
    "CONFIG/DATA_CONTRACTS/BRONZE/TradeHistory.json"
)

### Define a function to read and process an Excel file


In [4]:
def read_file(file_path: GlobalPath) -> None:
    """
    Reads and processes an Excel file from the specified file path.
    It performs data harmonization and saves the processed data as a CSV file.

    Args:
        file_path (Path): The path to the Excel file to be processed.
    """
    # Log the start of processing for the file
    print(f"\nProcessing => {file_path}")

    # Read the Excel file into a DataFrame
    df = pd.read_excel(
        file_path,
        engine="openpyxl",
        sheet_name=None,
        header=None,
        skipfooter=1,
    )

    # Find and select the correct sheetname containing "trade"
    df = find_correct_sheetname(df, sheet_name_regex="trade")

    # Find and set the correct headers matching "date"
    df = find_correct_headers(df, global_header_regex="date")

    # Replace punctuation from column names for consistency
    df = replace_punctuation_from_columns(df)

    # Fix duplicate column names by appending numerical suffixes
    df = fix_duplicate_column_names(df)

    # Drop rows where all elements are NaN
    df.dropna(how="all", inplace=True)

    # Align Datafame with DataContract
    df = align_with_datacontract(df, tradehistory_bronze_schema_file_path)

    return df

In [5]:
# Generate file paths for available Excel files in the source layer
file_paths = check_files_availability(
    tradehistory_source_layer_path, file_pattern="trade_*.xlsx"
)

# Process each file path
for file_path in file_paths:
    df = read_file(file_path)
    # Save the result as a CSV file in the bronze layer path
    output_file = tradehistory_bronze_layer_path.joinpath(
        file_path.name.replace("xlsx", "csv")
    )

    df.to_csv(output_file, index=None)

    # Log successful processing of the file
    print(f"Processed to => {output_file}")

Number of Files Detected => 5

Processing => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SOURCE/TradeHistory/trade_2021.xlsx
Sheet name => TRADE
DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/BRONZE/TradeHistory.json
Processed to => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2021.csv

Processing => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SOURCE/TradeHistory/trade_2324.xlsx
Sheet name => TRADE
DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/BRONZE/TradeHistory.json
Processed to => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2324.csv

Processing => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SOURCE/TradeHistory/trade_2425.xlsx


Sheet name => TRADE
DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/BRONZE/TradeHistory.json
Processed to => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2425.csv

Processing => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SOURCE/TradeHistory/trade_2223.xlsx
Sheet name => TRADE
DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/BRONZE/TradeHistory.json
Processed to => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2223.csv

Processing => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SOURCE/TradeHistory/trade_2122.xlsx
Sheet name => TRADE
DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/BRONZE/TradeHistory.json
Processed to => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2122.csv
