## BRONZE TO SILVER LAYER

### Reading & Validate the Data from the Files


In [1]:
# Import necessary libraries and utility functions
import pandas as pd
from MyModules.globalpath import GlobalPath
from MyModules.utilities import replace_punctuation_from_columns

In [2]:
# Instantiate GlobalPath


tradehistory_bronze_layer_path = GlobalPath("DATA/BRONZE/TradeHistory")
symbol_silver_file_path = GlobalPath("DATA/SILVER/Symbol/Symbol_data.csv")
tradehistory_silver_file_path = GlobalPath(
    "DATA/SILVER/TradeHistory/TradeHistory_data.csv"
)

### Function Definitions

- **concat_company**: Concatenates stock names based on instrument type.
- **read_file**: Reads and processes a CSV file from the Bronze layer.


In [3]:
# Function to apply the conditional concatenation


def get_scrip_name(row: pd.Series) -> str:
    """
    Concatenate stock names based on the instrument type.

    Parameters:
    row (pd.Series): A row of DataFrame containing instrument data.

    Returns:
    str: The concatenated stock name.
    """
    if row["instrument_type"] == "European Call":
        company = (
            str(row["company"])
            + "-CE-"
            + str(row["strike_price"])
            + "-"
            + row["expiry"]
        )
    elif row["instrument_type"] == "European Put":
        company = (
            str(row["company"])
            + "-PE-"
            + str(row["strike_price"])
            + "-"
            + row["expiry"]
        )
    else:
        company = str(row["company"])
    return company.strip().upper()

In [4]:
# Function to read and process a CSV file


def read_file(file_path):
    """
    Reads and processes a CSV file from the Bronze layer.

    Parameters:
    file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The processed DataFrame.
    """
    print(f"Processing file: {file_path}")

    # Read the CSV file
    df = pd.read_csv(file_path)
    df = replace_punctuation_from_columns(df)

    # Convert 'trade_num' to int
    df["trade_num"] = df["trade_num"].fillna(0).astype(int)

    # Add Datetime Col
    df["datetime"] = pd.to_datetime(
        df["date"].str.replace("00:00:00", "").str.strip()
        + " "
        + df["trade_time"].fillna("00:00:00"),
        format="%Y-%m-%d %H:%M:%S",
    )

    # Convert 'expiry' to desired string format
    df["expiry_date"] = pd.to_datetime(df["expiry"], format="%d-%m-%Y")
    df["expiry"] = df["expiry_date"].dt.strftime("%d%b%Y")

    # Convert the 'side' column in df to uppercase
    df["side"] = df["side"].astype(str).str.strip().str.upper()

    # Add the "IN" prefix to 'scrip_code'
    df["scrip_code"] = (
        "IN" + df["scrip_code"].astype(str).str.strip().str.upper()
    )

    # Apply the function to the DataFrame
    df["scrip_name"] = df.apply(get_scrip_name, axis=1)

    # Remove all-NA columns from each DataFrame
    df = df.dropna(axis=1, how="all")

    print(f"Completed processing file: {file_path}")
    return df

### Data Processing

- Generate file paths for available CSV files in the Bronze layer.
- Read and concatenate data from multiple files.


In [5]:
# Generate file_paths
file_paths = check_files_availability(
    tradehistory_bronze_layer_path,
    file_pattern="trade_*.csv",
)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through List of all CSV files in the folder
for file_path in file_paths:
    try:
        # Read the CSV file
        df = read_file(file_path)
        # Append the DataFrame to the list
        if not df.empty:
            dfs.append(df)
    except Exception as e:
        print(f"Failed to read {file_path} due to error: {e}")

# Concatenate all DataFrames into one
df_TradeHistory = pd.concat(dfs, ignore_index=True)

Number of Files Detected: 5
Processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2021.csv
Completed processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2021.csv
Processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2122.csv
Completed processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2122.csv
Processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2223.csv
Completed processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2223.csv
Processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2324.csv
Completed processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2324.csv
Processing file: C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\BRONZE\TradeH

### Data Harmonization

- Replace scrip codes with company names using the SILVER layer symbol data.


In [6]:
# Replace scrip code with company name
df_Symbol = pd.read_csv(symbol_silver_file_path)

# String and strip
df_TradeHistory = df_TradeHistory.merge(
    df_Symbol[["scrip_code", "symbol"]],
    left_on="scrip_code",
    right_on="scrip_code",
    how="left",
)

### Final Processing and Export

- Sort the DataFrame by date and stock name.
- Save the processed data as a CSV file in the Silver layer.


In [7]:
# Sort the DataFrame by date and stock name
df_TradeHistory = df_TradeHistory.sort_values(by=["datetime"])  # , "company"

# Select relevant columns
relevant_columns = [
    "datetime",
    "exchange",
    "segment",
    "symbol",
    "scrip_name",
    "side",
    "amount",
    "quantity",
    "price",
    "expiry_date",
]
print(
    "REMAINING COLUMNS :", set(df_TradeHistory.columns) - set(relevant_columns)
)
df_TradeHistory = df_TradeHistory[relevant_columns]

# Save the result as a CSV file
df_TradeHistory.to_csv(tradehistory_silver_file_path, index=None)
print("SILVER Layer CSV file for trade history successfully created at:")
print(tradehistory_silver_file_path)
# Log the DataFrame debug
df_TradeHistory.info()

REMAINING COLUMNS : {'scrip_code', 'trade_time', 'expiry', 'trade_num', 'company', 'strike_price', 'instrument_type', 'date'}
SILVER Layer CSV file for trade history successfully created at:
C:\Users\prashant.tripathi\Code\PortfolioTracker\DATA\SILVER\TradeHistory\TradeHistory_data.csv
<class 'pandas.core.frame.DataFrame'>
Index: 254 entries, 17 to 41
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   datetime     254 non-null    datetime64[ns]
 1   exchange     254 non-null    object        
 2   segment      254 non-null    object        
 3   symbol       254 non-null    object        
 4   scrip_name   254 non-null    object        
 5   side         254 non-null    object        
 6   amount       254 non-null    float64       
 7   quantity     254 non-null    float64       
 8   price        254 non-null    float64       
 9   expiry_date  206 non-null    datetime64[ns]
dtypes: datetime64[ns](2)