## BRONZE TO SILVER LAYER

### GOLD LAYER - PROCESS HOLDING RECORDS HISTORY


In [1]:
# Import necessary libraries and utility functions
import pandas as pd
from common_utilities import global_path, logger
from datetime import datetime

### Data Processing

- Load and Filter trade history data from the Gold layer.


In [2]:
try:
    # Load trade history from CSV into DataFrame
    df = pd.read_csv(global_path.tradehistory_gold_file_path)
    logger.info(
        f"Loaded GOLD Layer trade history data from: {global_path.tradehistory_gold_file_path}"
    )

    # Filter for specific segments
    df = df[df["segment"].isin(["EQ", "MF"])]

    # Select relevant columns
    df = df[
        [
            "datetime",
            "segment",
            "stock_name",
            "avg_price",
            "holding_quantity",
            "holding_amount",
        ]
    ]

    # Convert 'datetime' column to datetime objects
    df["datetime"] = pd.to_datetime(df["datetime"])

    # Add a 'date' column by extracting the date part from 'datetime'
    df["date"] = df["datetime"].dt.date

    # Sort DataFrame by 'segment', 'stock_name', and 'datetime'
    df = df.sort_values(by=["segment", "stock_name", "datetime"])
except Exception as e:
    logger.error(
        f"Failed to read GOLD Layer trade history data due to error: {e}"
    )

2024-08-01T13:23:36Z - INFO - Loaded GOLD Layer trade history data from: C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\TradeHistory\TradeHistory_data.csv


### Data Processing

- Process data to include all dates up to today.
- Merge with stock price data and calculate current values.
- Save the processed data to the Gold layer.


In [3]:
# Get maximum 'datetime' for each 'date' and 'stock_name' combination
max_datetime_df = (
    df.groupby(["date", "stock_name"])["datetime"].max().reset_index()
)

# Retain only rows with maximum datetime for each 'date' and 'stock_name'
df = df.merge(max_datetime_df, on=["date", "stock_name", "datetime"])

# Sort the DataFrame by 'segment', 'stock_name', and 'date'
df = df.sort_values(by=["segment", "stock_name", "date"]).reset_index(drop=True)

# Process each stock name separately
stock_names = df["stock_name"].unique()
result = []

for stock_name in stock_names:
    stock_data = df[df["stock_name"] == stock_name].copy()

    # Set 'date' as index and reindex to include all dates up to today
    stock_data = stock_data.set_index("date")
    date_range = pd.date_range(
        start=stock_data.index.min(),
        end=datetime.today().date(),
        freq="D",
    )
    stock_data = stock_data.reindex(date_range, method="ffill")

    # Reset index to bring 'date' back as a column
    stock_data = stock_data.reset_index().rename(columns={"index": "date"})
    result.append(stock_data)

# Combine processed DataFrames into one
df = pd.concat(result, ignore_index=True)

# Load stock prices and merge with the main DataFrame
try:
    df_StockPrice = pd.read_csv(global_path.stockprice_silver_file_path)
    df_StockPrice = df_StockPrice[["date", "stock_name", "close"]]
    df_StockPrice["date"] = pd.to_datetime(df_StockPrice["date"])
    logger.info(
        f"Loaded SILVER Layer stock price data from: {global_path.stockprice_silver_file_path}"
    )
except Exception as e:
    logger.error(
        f"Failed to read SILVER Layer stock price data due to error: {e}"
    )

# Merge stock price data
df = pd.merge(df, df_StockPrice, on=["date", "stock_name"], how="left")

# Rename columns for clarity and calculate current value
df = df.rename(
    columns={
        "holding_amount": "investment",
        "holding_quantity": "quantity",
        "close": "ltp",
    }
)
df["current_value"] = df["ltp"] * df["quantity"]

# Calculate PnL and percentage
df["pnl_amount"] = df["current_value"] - df["investment"]
df["pnl_percentage"] = (df["pnl_amount"] / df["investment"]) * 100


# Filter out rows with zero 'holding_quantity'
df = df[(df["investment"] != 0) & (df["current_value"] != 0)]

# Round the values in to two decimal places
df = df.round(2)

# Final sorting and column selection
df = df.sort_values(by=["segment", "stock_name", "date"]).reset_index(drop=True)

2024-08-01T13:23:37Z - INFO - Loaded SILVER Layer stock price data from: C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\StockPrice\StockPrice_data.csv


In [4]:
# Save the final DataFrame to a CSV file
try:
    df = df[
        [
            "date",
            "segment",
            "stock_name",
            "quantity",
            "avg_price",
            "investment",
            "ltp",
            "current_value",
            "pnl_amount",
            "pnl_percentage"
        ]
    ]
    df.to_csv(global_path.holdings_gold_file_path, index=None)
    logger.info("GOLD Layer CSV file for Holdings successfully created at:")
    logger.info(global_path.holdings_gold_file_path.resolve())
    # Display DataFrame information and print success message
    df.info()
except Exception as e:
    logger.error(f"Failed to save GOLD Layer CSV file due to error: {e}")

2024-08-01T13:23:37Z - INFO - GOLD Layer CSV file for Holdings successfully created at:
2024-08-01T13:23:37Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\Holdings\Holdings_data.csv


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5869 entries, 0 to 5868
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            5869 non-null   datetime64[ns]
 1   segment         5869 non-null   object        
 2   stock_name      5869 non-null   object        
 3   quantity        5869 non-null   float64       
 4   avg_price       5869 non-null   float64       
 5   investment      5869 non-null   float64       
 6   ltp             3759 non-null   float64       
 7   current_value   3759 non-null   float64       
 8   pnl_amount      3759 non-null   float64       
 9   pnl_percentage  3759 non-null   float64       
dtypes: datetime64[ns](1), float64(7), object(2)
memory usage: 458.6+ KB
