## BRONZE TO SILVER LAYER

### Bronze Layer - Trade History

In [1]:
# Import necessary libraries and utility functions
import pandas as pd
from common_utilities import (
    global_path,
    replace_punctuation_from_columns,
    logger
)

### Data Processing

- Initialize an empty list to store DataFrames.
- Read and concatenate data from multiple CSV files.
- Harmonize column names and clean the data.
- Convert data types and add additional datetime columns.

In [2]:
# Initialize an empty list to store DataFrames
df_ledger_list = []

# Generate file paths for available CSV files in the Bronze layer
file_paths = global_path.ledger_bronze_layer_path.glob("*.csv")

# Loop through List of all CSV files in the folder
for file_path in file_paths:
    try:
        logger.info(f"Processing file: {file_path}")
        # Read the CSV file
        df = pd.read_csv(file_path)
        # Append the DataFrame to the list
        df_ledger_list.append(df)
    except Exception as e:
        logger.error(f"Failed to read {file_path} due to error: {e}")

# Concatenate all DataFrames into one
df_ledger = pd.concat(df_ledger_list, ignore_index=True)

# Harmonize column names
df_ledger = replace_punctuation_from_columns(df_ledger)

# Remove all-NA columns from each DataFrame
df_ledger.dropna(how="all", axis=1, inplace=True)

# Add Datetime Columns
df_ledger["trade_date"] = pd.to_datetime(df_ledger["trade_date"], format="%Y-%m-%d").dt.date
df_ledger["settlement_date"] = pd.to_datetime(df_ledger["settlement_date"], format="%Y-%m-%d").dt.date

2024-08-01T13:03:41Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\Ledger\ledger_2021.csv
2024-08-01T13:03:41Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\Ledger\ledger_2122.csv
2024-08-01T13:03:41Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\Ledger\ledger_2223.csv
2024-08-01T13:03:41Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\Ledger\ledger_2324.csv
2024-08-01T13:03:41Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\Ledger\ledger_2425.csv


### Final Processing and Export

- Sort the DataFrame by relevant columns.
- Save the processed data as a CSV file in the Silver layer.

In [3]:
try:
    # Sort the DataFrame by date and other relevant columns
    df_ledger = df_ledger.sort_values(by=["trade_date", "settlement_date", "exchange", "segment"])
    
    # Select relevant columns
    df_ledger = df_ledger[
        [
            "wallet",
            "trade_date",
            "settlement_date",
            "exchange",
            "segment",
            "type",
            "narration",
            "debit",
            "credit",
            "closing_balance",
        ]
    ]
    # Log the DataFrame info
    df_ledger.info()
    
    # Save the result as a CSV file
    df_ledger.to_csv(global_path.ledger_silver_file_path, index=None)
    logger.info("SILVER Layer CSV file for Bill Summary successfully created at:")
    logger.info(global_path.ledger_silver_file_path.resolve())
except Exception as e:
    logger.error(f"Failed to save SILVER Layer CSV file due to error: {e}")


2024-08-01T13:03:41Z - INFO - SILVER Layer CSV file for Bill Summary successfully created at:
2024-08-01T13:03:41Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\Ledger\Ledger_data.csv


<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 0 to 99
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   wallet           100 non-null    object 
 1   trade_date       100 non-null    object 
 2   settlement_date  100 non-null    object 
 3   exchange         100 non-null    object 
 4   segment          100 non-null    object 
 5   type             100 non-null    object 
 6   narration        100 non-null    object 
 7   debit            59 non-null     float64
 8   credit           39 non-null     float64
 9   closing_balance  100 non-null    float64
dtypes: float64(3), object(7)
memory usage: 8.6+ KB
