## BRONZE TO SILVER LAYER

### Bronze Layer - ScripCode


In [1]:
from pathlib import Path

# Import necessary libraries and utility functions
import pandas as pd
from common.utilities import (
    check_files_availability,
    global_path,
    logger,
    replace_punctuation_from_columns,
)

In [2]:
# Define a function to read and process an csv file


def read_file(file_path: Path) -> None:
    """
    Processes CSV files from the Bronze layer and consolidates them into a single DataFrame.
    The data is then harmonized and saved as a CSV file in the Silver layer.
    """
    # Log the reading of the file
    logger.info(f"Processing file: {file_path}")

    # Read each CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Harmonize the DataFrame by replacing punctuation from column names
    df = replace_punctuation_from_columns(df)

    # Drop columns where all elements are NaN
    df.dropna(how="all", axis=1, inplace=True)
    return df

In [3]:
# Process the Bronze layer CSV files to create a consolidated DataFrame


# Initialize an empty list to store DataFrames
df_symbol_list = []

# Generate file paths for available Excel files in the source layer
file_paths = check_files_availability(
    global_path.symbol_bronze_layer_path, file_pattern="*.csv"
)

# Loop through all CSV files in the bronze layer folder
for file_path in file_paths:
    try:
        df = read_file(file_path)
        # Append the DataFrame to the list
        df_symbol_list.append(df)
    except Exception as e:
        # Log any exceptions during file reading
        logger.info(f"Failed to read {file_path} due to error: {e}")

# Concatenate all DataFrames into one
df = pd.concat(df_symbol_list, ignore_index=True)

# Sort the DataFrame by 'scrip_code'
df = df.sort_values(by=["scrip_code"])

# Save the result as a CSV file in the silver layer
df.to_csv(global_path.symbol_silver_file_path, index=None)
logger.info("Successfully created SILVER Layer CSV file for Symbol at:")
logger.info(global_path.symbol_silver_file_path.resolve())
# Log the DataFrame information
df.info()

2024-08-07T13:57:57Z - INFO - Number of Files Detected: 1
2024-08-07T13:57:57Z - INFO - Processing file: C:\Users\prashant.tripathi\Code\Upstox\DATA\BRONZE\Symbol\Symbol_data.csv


2024-08-07T13:57:57Z - INFO - Successfully created SILVER Layer CSV file for Symbol at:
2024-08-07T13:57:57Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\Symbol\Symbol_data.csv


<class 'pandas.core.frame.DataFrame'>
Index: 4328 entries, 2 to 0
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   scrip_code       4328 non-null   object
 1   symbol           4328 non-null   object
 2   scrip_name       4328 non-null   object
 3   isin_no          4328 non-null   object
 4   instrument_type  4328 non-null   object
dtypes: object(5)
memory usage: 202.9+ KB
