## BRONZE TO SILVER LAYER

### Reading & Validate the Data from the Files


In [1]:
# Import necessary libraries and utility functions
import pandas as pd

from PortfolioTracker.globalpath import global_path
from PortfolioTracker.logger import logger
from PortfolioTracker.utilities import (
    check_files_availability,
    replace_punctuation_from_columns,
)

### Function Definitions

- **concat_stock_name**: Concatenates stock names based on instrument type.
- **read_file**: Reads and processes a CSV file from the Bronze layer.


In [2]:
# Function to apply the conditional concatenation
def concat_company_name(row: pd.Series) -> str:
    """
    Concatenate stock names based on the instrument type.

    Parameters:
    row (pd.Series): A row of DataFrame containing instrument data.

    Returns:
    str: The concatenated stock name.
    """
    if row["instrument_type"] == "European Call":
        company = (
            str(row["company"])
            + "-CE-"
            + str(row["strike_price"])
            + "-"
            + row["expiry"]
        )
    elif row["instrument_type"] == "European Put":
        company = (
            str(row["company"])
            + "-PE-"
            + str(row["strike_price"])
            + "-"
            + row["expiry"]
        )
    else:
        company = str(row["company"])
    return company.strip().upper()

In [3]:
# Function to read and process a CSV file
def read_file(file_path):
    """
    Reads and processes a CSV file from the Bronze layer.

    Parameters:
    file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The processed DataFrame.
    """
    logger.info(f"Processing file: {file_path}")

    # Read the CSV file
    df = pd.read_csv(file_path)
    df = replace_punctuation_from_columns(df)

    # Convert 'trade_num' to int
    df["trade_num"] = df["trade_num"].fillna(0).astype(int)

    # Add Datetime Col
    df["datetime"] = pd.to_datetime(
        df["date"].str.replace("00:00:00", "").str.strip()
        + " "
        + df["trade_time"].fillna("00:00:00"),
        format="%Y-%m-%d %H:%M:%S",
    )

    # Convert 'expiry' to desired string format
    df["expiry_date"] = pd.to_datetime(df["expiry"], format="%d-%m-%Y")
    df["expiry"] = df["expiry_date"].dt.strftime("%d%b%Y")

    # Convert the 'side' column in df to uppercase
    df["side"] = df["side"].astype(str).str.strip().str.upper()

    # Apply the function to the DataFrame
    df["company"] = df.apply(concat_company_name, axis=1)

    # Remove all-NA columns from each DataFrame
    df = df.dropna(axis=1, how="all")

    logger.info(f"Completed processing file: {file_path}")
    return df

### Data Processing

- Generate file paths for available CSV files in the Bronze layer.
- Read and concatenate data from multiple files.


In [4]:
# Generate file_paths
file_paths = check_files_availability(
    global_path.tradehistory_bronze_layer_path,
    file_pattern="trade_*.csv",
)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through List of all CSV files in the folder
for file_path in file_paths:
    try:
        # Read the CSV file
        df = read_file(file_path)
        # Append the DataFrame to the list
        if not df.empty:
            dfs.append(df)
    except Exception as e:
        logger.error(f"Failed to read {file_path} due to error: {e}")

# Concatenate all DataFrames into one
df_TradeHistory = pd.concat(dfs, ignore_index=True)

2024-08-19T00:39:56Z - INFO - Number of Files Detected: 5


2024-08-19T00:39:56Z - INFO - Processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2021.csv


2024-08-19T00:39:56Z - INFO - Completed processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2021.csv


2024-08-19T00:39:56Z - INFO - Processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2122.csv


2024-08-19T00:39:56Z - INFO - Completed processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2122.csv


2024-08-19T00:39:56Z - INFO - Processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2223.csv


2024-08-19T00:39:56Z - INFO - Completed processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2223.csv


2024-08-19T00:39:56Z - INFO - Processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2324.csv


2024-08-19T00:39:56Z - INFO - Completed processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2324.csv


2024-08-19T00:39:56Z - INFO - Processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2425.csv


2024-08-19T00:39:56Z - INFO - Completed processing file: /storage/emulated/0/PortfolioTracker/DATA/BRONZE/TradeHistory/trade_2425.csv


### Data Harmonization

- Replace scrip codes with company names using the SILVER layer symbol data.


In [5]:
# Replace scrip code with company name
df_Symbol = pd.read_csv(global_path.symbol_silver_file_path)

# String and strip
df_Symbol["scrip_code"] = df_Symbol["scrip_code"].astype(str).str.strip()
df_TradeHistory["scrip_code"] = (
    df_TradeHistory["scrip_code"].astype(str).str.strip()
)

# Merge df_TradeHistory with df_Symbol on the matching columns
df_TradeHistory = df_TradeHistory.merge(
    df_Symbol[["scrip_code", "symbol"]],
    left_on="scrip_code",
    right_on="scrip_code",
    how="left",
)

df_TradeHistory["stock_name"] = df_TradeHistory["symbol"].combine_first(
    df_TradeHistory["company"]
)

df_TradeHistory["symbol"] = df_TradeHistory["symbol"].combine_first(
    df_TradeHistory["scrip_code"]
)

### Final Processing and Export

- Sort the DataFrame by date and stock name.
- Save the processed data as a CSV file in the Silver layer.


In [6]:
# Sort the DataFrame by date and stock name
df_TradeHistory = df_TradeHistory.sort_values(by=["datetime", "stock_name"])

# Select relevant columns
df_TradeHistory = df_TradeHistory[
    [
        "datetime",
        "exchange",
        "segment",
        "symbol",
        "stock_name",
        "side",
        "amount",
        "quantity",
        "price",
        "expiry_date",
    ]
]

# Save the result as a CSV file
df_TradeHistory.to_csv(global_path.tradehistory_silver_file_path, index=None)
logger.info("SILVER Layer CSV file for trade history successfully created at:")
logger.info(global_path.tradehistory_silver_file_path.resolve())
# Log the DataFrame info
df_TradeHistory.info()

2024-08-19T00:39:56Z - INFO - SILVER Layer CSV file for trade history successfully created at:


2024-08-19T00:39:56Z - INFO - /storage/emulated/0/PortfolioTracker/DATA/SILVER/TradeHistory/TradeHistory_data.csv


<class 'pandas.core.frame.DataFrame'>
Index: 233 entries, 17 to 42
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   datetime     233 non-null    datetime64[ns]
 1   exchange     233 non-null    object        
 2   segment      233 non-null    object        
 3   symbol       233 non-null    object        
 4   stock_name   233 non-null    object        
 5   side         233 non-null    object        
 6   amount       233 non-null    float64       
 7   quantity     233 non-null    float64       
 8   price        233 non-null    float64       
 9   expiry_date  185 non-null    datetime64[ns]
dtypes: datetime64[ns](2), float64(3), object(5)
memory usage: 20.0+ KB
