## BRONZE TO SILVER LAYER

### Reading & Validate the Data from the Files


In [140]:
# Import necessary libraries and utility functions
import pandas as pd

from ETLTools import GlobalPath, utils

In [141]:
# Instantiate GlobalPath
tradehistory_bronze_layer_path = GlobalPath("DATA/BRONZE/TradeHistory")
symbol_silver_file_path = GlobalPath("DATA/SILVER/Symbol/Symbol_data.csv")
tradehistory_silver_file_path = GlobalPath(
    "DATA/SILVER/TradeHistory/TradeHistory_data.csv"
)

### Function Definitions

- **concat_company**: Concatenates stock names based on instrument type.
- **read_file**: Reads and processes a CSV file from the Bronze layer.


In [142]:
# Function to apply the conditional concatenation


def get_scrip_name(row: pd.Series) -> str:
    """
    Concatenate stock names based on the instrument type.

    Parameters:
    row (pd.Series): A row of DataFrame containing instrument data.

    Returns:
    str: The concatenated stock name.
    """
    if row["instrument_type"] == "European Call":
        company = (
            str(row["company"])
            + "-CE-"
            + str(row["strike_price"])
            + "-"
            + row["expiry"]
        )
    elif row["instrument_type"] == "European Put":
        company = (
            str(row["company"])
            + "-PE-"
            + str(row["strike_price"])
            + "-"
            + row["expiry"]
        )
    else:
        company = str(row["company"])
    return company.strip().upper()

In [143]:
# Function to read and process a CSV file
def read_file(file_path: GlobalPath):
    """
    Reads and processes a CSV file from the Bronze layer.

    Parameters:
    file_path (str): The path to the CSV file.

    Returns:
    pd.DataFrame: The processed DataFrame.
    """
    print(f"Processing file: {file_path}")

    # Read the CSV file
    df = pd.read_csv(file_path)
    df = utils.replace_punctuation_from_columns(df)

    # Convert 'trade_num' to int
    df["trade_num"] = df["trade_num"].fillna(0).astype(int)

    # Add Datetime Col
    df["datetime"] = pd.to_datetime(
        df["date"].str.replace("00:00:00", "").str.strip()
        + " "
        + df["trade_time"].fillna("00:00:00"),
        format="%Y-%m-%d %H:%M:%S",
    )

    # Convert 'expiry' to desired string format
    df["expiry_date"] = pd.to_datetime(df["expiry"], format="%d-%m-%Y")
    df["expiry"] = df["expiry_date"].dt.strftime("%d%b%Y")
    df["expiry_date"] = df["expiry_date"].dt.date.astype(str).replace("NaT", "")

    # Convert the 'side' column in df to uppercase
    df["side"] = df["side"].astype(str).str.strip().str.upper()

    # Add the "IN" prefix to 'scrip_code'
    df["scrip_code"] = (
        "IN" + df["scrip_code"].astype(str).str.strip().str.upper()
    )

    # Apply the function to the DataFrame
    df["scrip_name"] = df.apply(get_scrip_name, axis=1)

    # Remove all-NA columns from each DataFrame
    df = df.dropna(axis=1, how="all")
    return df

### Data Processing

- Generate file paths for available CSV files in the Bronze layer.
- Read and concatenate data from multiple files.


In [144]:
# Generate file_paths
file_paths = utils.check_files_availability(
    tradehistory_bronze_layer_path,
    file_pattern="trade_*.csv",
)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through List of all CSV files in the folder
for file_path in file_paths:
    try:
        # Read the CSV file
        df = read_file(file_path)
        # Append the DataFrame to the list
        if not df.empty:
            dfs.append(df)
    except Exception as e:
        print(f"Failed to read {file_path} due to error: {e}")

# Concatenate all DataFrames into one
df_TradeHistory = pd.concat(dfs, ignore_index=True)

Number of Files Detected: 5
Processing file: D:\Study_Material\Anudip_foundation\Main_Projects\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2122.csv
Processing file: D:\Study_Material\Anudip_foundation\Main_Projects\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2223.csv
Processing file: D:\Study_Material\Anudip_foundation\Main_Projects\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2324.csv
Processing file: D:\Study_Material\Anudip_foundation\Main_Projects\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_2425.csv
Processing file: D:\Study_Material\Anudip_foundation\Main_Projects\PortfolioTracker\DATA\BRONZE\TradeHistory\trade_groww.csv


### Data Harmonization

- Replace scrip codes with company names using the SILVER layer symbol data.


In [145]:
# Replace scrip code with company name
df_Symbol = pd.read_csv(symbol_silver_file_path)
df_Symbol = df_Symbol.rename(columns={"scrip_name": "company_name"})

# String and strip
df_TradeHistory = df_TradeHistory.merge(
    df_Symbol[["scrip_code", "company_name", "symbol"]],
    left_on="scrip_code",
    right_on="scrip_code",
    how="left",
)

# Update 'scrip_name' based on the value of 'segment'
df_TradeHistory.loc[df_TradeHistory["segment"] == "EQ", "scrip_name"] = (
    df_TradeHistory["company_name"]
)

### Final Processing and Export

- Sort the DataFrame by date and stock name.
- Save the processed data as a CSV file in the Silver layer.


In [146]:
# Group by specified columns and sum the 'quantity'
df_TradeHistory = (
    df_TradeHistory.groupby(
        [
            "datetime",
            "exchange",
            "segment",
            "symbol",
            "side",
            "price",
            "scrip_name",
            "expiry_date",
        ]
    )
    .agg(
        quantity=("quantity", "sum"),
    )
    .reset_index()
)

# Calculate 'amount' as price * quantity
df_TradeHistory["amount"] = (
    df_TradeHistory["price"] * df_TradeHistory["quantity"]
)

# Round numerical values to 2 decimal places
df_TradeHistory = df_TradeHistory.round(2)

### Filter out fno

In [147]:
df_TradeHistory = df_TradeHistory[df_TradeHistory["segment"] == "EQ"]

In [148]:
# Sort the DataFrame by date and stock name
df_TradeHistory = df_TradeHistory.sort_values(
    by=["exchange", "segment", "symbol", "scrip_name", "datetime", "side"]
)  # , "company"

# Select relevant columns
relevant_columns = [
    "datetime",
    "exchange",
    "segment",
    "symbol",
    "scrip_name",
    "side",
    "amount",
    "quantity",
    "price",
    "expiry_date",
]
print(
    "REMAINING COLUMNS :", set(df_TradeHistory.columns) - set(relevant_columns)
)
df_TradeHistory = df_TradeHistory[relevant_columns]


# Save the result as a CSV file
df_TradeHistory.to_csv(tradehistory_silver_file_path, index=None)
print("SILVER Layer CSV file for trade history successfully created at:")
print(tradehistory_silver_file_path)
# Log the DataFrame debug
df_TradeHistory.info()

REMAINING COLUMNS : set()
SILVER Layer CSV file for trade history successfully created at:
D:\Study_Material\Anudip_foundation\Main_Projects\PortfolioTracker\DATA\SILVER\TradeHistory\TradeHistory_data.csv
<class 'pandas.core.frame.DataFrame'>
Index: 161 entries, 119 to 125
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   datetime     161 non-null    datetime64[ns]
 1   exchange     161 non-null    object        
 2   segment      161 non-null    object        
 3   symbol       161 non-null    object        
 4   scrip_name   161 non-null    object        
 5   side         161 non-null    object        
 6   amount       161 non-null    float64       
 7   quantity     161 non-null    int64         
 8   price        161 non-null    float64       
 9   expiry_date  161 non-null    object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(6)
memory usage: 13.8+ KB
