## SOURCE TO BRONZE LAYER

> This Notebooks reads the RAW files, performs data harmonization


### Importing necessary files and packages


In [1]:
import pandas as pd
from common_utilities import (
    check_files_availability,
    find_correct_headers,
    find_correct_sheetname,
    fix_duplicate_column_names,
    global_path,
    replace_punctuation_from_columns,
)

### Reading & Validate the data from the files


In [2]:
def read_file(file_path):
    print(f"\nProccessing => {file_path}")
    df = pd.read_excel(
        file_path, engine="openpyxl", sheet_name=None, header=None, skipfooter=1
    )
    df = find_correct_sheetname(df, sheet_name_regex="trade")
    df = find_correct_headers(df, global_header_regex="date")
    df = replace_punctuation_from_columns(df)
    df = fix_duplicate_column_names(df)
    df = df.dropna(how="all")

    # Save the result as a csv file
    output_file = global_path.tradehistory_bronze_layer_path.joinpath(
        file_path.name.replace("xlsx", "csv")
    )
    df.to_csv(output_file, index=None)
    print(f"Proccessed => {output_file}")

In [3]:
# Generate file_paths
file_paths = check_files_availability(
    global_path.tradehistory_source_layer_path,
    file_pattern="trade_*.xlsx",
)

for file_path in file_paths:
    read_file(file_path)

Number of Files Detected: 5

Proccessing => C:\Users\prashant.tripathi\Code\Upstox\DATA\SOURCE\TradeHistory\trade_2021.xlsx
Sheet name => TRADE
Proccessed => C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\TradeHistory\trade_2021.csv

Proccessing => C:\Users\prashant.tripathi\Code\Upstox\DATA\SOURCE\TradeHistory\trade_2122.xlsx
Sheet name => TRADE
Proccessed => C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\TradeHistory\trade_2122.csv

Proccessing => C:\Users\prashant.tripathi\Code\Upstox\DATA\SOURCE\TradeHistory\trade_2223.xlsx
Sheet name => TRADE
Proccessed => C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\TradeHistory\trade_2223.csv

Proccessing => C:\Users\prashant.tripathi\Code\Upstox\DATA\SOURCE\TradeHistory\trade_2324.xlsx
Sheet name => TRADE
Proccessed => C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\TradeHistory\trade_2324.csv

Proccessing => C:\Users\prashant.tripathi\Code\Upstox\DATA\SOURCE\TradeHistory\trade_2425.xlsx
Sheet name => TRADE
Proccessed => C:\Use