## BRONZE TO SILVER LAYER

### Bronze Layer - ScripCode


In [1]:
# Import necessary libraries and utility functions
from pathlib import Path

import pandas as pd

from PortfolioTracker.globalpath import GlobalPath
from PortfolioTracker.utilities import (
    check_files_availability,
    replace_punctuation_from_columns,
    replace_punctuation_from_string,
)

In [2]:
# Instantiate GlobalPath
global_path = GlobalPath()
# GLOBAL PATH
symbol_bronze_layer_path = global_path.joinpath("DATA/BRONZE/Symbol")
symbol_silver_file_path = global_path.joinpath(
    "DATA/SILVER/Symbol/Symbol_data.csv"
)

In [3]:
# Define a function to read and process an csv file


def read_file(file_path: Path) -> None:
    """
    Processes CSV files from the Bronze layer and consolidates them into a single DataFrame.
    The data is then harmonized and saved as a CSV file in the Silver layer.
    """
    # Log the reading of the file
    print(f"Processing file: {file_path}")

    # Read each CSV file into a DataFrame
    df = pd.read_csv(file_path, dtype={"scrip_code": str})

    # Harmonize the DataFrame by replacing punctuation from column names
    df = replace_punctuation_from_columns(df)

    # Drop rows where 'isin' is NaN or null
    df = df.dropna(subset=["isin"])

    # Add the "IN" prefix to 'scrip_code' where 'instrument_type' is "Equity"
    df.loc[df["instrument_type"] == "Equity", "scrip_code"] = "IN" + df.loc[
        df["instrument_type"] == "Equity", "scrip_code"
    ].astype(str)

    # Assign 'isin' to 'scrip_code' where 'instrument_type' is "Mutual Fund"
    df.loc[df["instrument_type"] == "Mutual Fund", "scrip_code"] = df.loc[
        df["instrument_type"] == "Mutual Fund", "isin"
    ]

    # Apply modify_logic only where instrument_type is "Mutual Fund"
    df.loc[df["instrument_type"] == "Mutual Fund", "symbol"] = (
        df.loc[df["instrument_type"] == "Mutual Fund", "scrip_name"]
        .apply(replace_punctuation_from_string)
        .str.upper()
    )

    df["scrip_code"] = df["scrip_code"].astype(str).str.strip().str.upper()

    # Drop columns where all elements are NaN
    df.dropna(how="all", axis=1, inplace=True)
    return df

### Process the Bronze layer CSV files to create a consolidated DataFrame


In [4]:
# Initialize an empty list to store DataFrames
df_symbol_list = []

# Generate file paths for available Excel files in the source layer
file_paths = check_files_availability(
    symbol_bronze_layer_path, file_pattern="*.csv"
)

# Loop through all CSV files in the bronze layer folder
for file_path in file_paths:
    try:
        df = read_file(file_path)
        # Append the DataFrame to the list
        df_symbol_list.append(df)
    except Exception as e:
        # Log any exceptions during file reading
        print(f"Failed to read {file_path} due to error: {e}")

# Concatenate all DataFrames into one
df = pd.concat(df_symbol_list, ignore_index=True)

df = df[["instrument_type", "isin", "symbol", "scrip_name", "scrip_code"]]

# Sort the DataFrame
df = df.sort_values(by=["instrument_type", "scrip_name"])

# Save the result as a CSV file in the silver layer
df.to_csv(symbol_silver_file_path, index=None)
print(
    f"Successfully created SILVER Layer CSV file for Symbol at: {symbol_silver_file_path}"
)
# Log the DataFrame debugrmation
df.info()

Number of Files Detected: 1
Processing file: /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/BRONZE/Symbol/Symbol_data.csv
Successfully created SILVER Layer CSV file for Symbol at: /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SILVER/Symbol/Symbol_data.csv
<class 'pandas.core.frame.DataFrame'>
Index: 16913 entries, 15214 to 4213
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   instrument_type  16913 non-null  object
 1   isin             16913 non-null  object
 2   symbol           16913 non-null  object
 3   scrip_name       16913 non-null  object
 4   scrip_code       16913 non-null  object
dtypes: object(5)
memory usage: 792.8+ KB
