## BRONZE TO SILVER LAYER

### GOLD LAYER - PROCESS HOLDING RECORDS HISTORY


In [1]:
# Import necessary libraries and utility functions
import pandas as pd
from common.utilities import global_path, logger

In [2]:
# Load holdings data from the specified CSV file path
df_holdings = pd.read_csv(global_path.holdings_gold_file_path)

# Convert the 'date' column in holdings data to datetime type for consistency
df_holdings["date"] = pd.to_datetime(df_holdings["date"])

In [3]:
# Create a function to expand the date range for each stock


def expand_dates(stock_df):
    """
    Expands the date range for each stock to include all dates from the
    minimum date to the current date and forward fills the missing values.

    Parameters:
    stock_df (pd.DataFrame): DataFrame containing stock data for a single stock.

    Returns:
    pd.DataFrame: DataFrame with expanded date range and forward-filled values.
    """
    # Find the minimum date for this stock
    min_date = stock_df["date"].min()

    # Create a date range from the minimum date to today
    date_range = pd.date_range(start=min_date, end=pd.to_datetime("today"))

    # Reindex the stock_df to include the full date range
    stock_df = stock_df.set_index("date").reindex(date_range)

    # Forward fill the values to fill missing dates
    stock_df = stock_df.ffill().reset_index()

    # Rename the 'index' column to 'date'
    stock_df = stock_df.rename(columns={"index": "date"})

    return stock_df


# Apply the expand_dates function to each stock group
df_holdings = (
    df_holdings.groupby("stock_name")
    .apply(expand_dates, include_groups=False)
    .reset_index()
)

In [4]:
# Load stock prices data from the specified CSV file path
df_StockPrice = pd.read_csv(global_path.stockprice_silver_file_path)

# Convert the 'date' column in stock prices to datetime type for consistency
df_StockPrice["date"] = pd.to_datetime(df_StockPrice["date"])

logger.info(
    f"Loaded SILVER Layer stock price data from: {global_path.stockprice_silver_file_path}"
)

2024-08-07T14:00:17Z - INFO - Loaded SILVER Layer stock price data from: C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\StockPrice\StockPrice_data.csv


In [5]:
# Merge the expanded holdings data with the stock price data
df_holdings = pd.merge(
    df_holdings,
    df_StockPrice,
    on=["date", "stock_name"],
    how="left",
)

# Calculate the current value of holdings, PnL amount, and PnL percentage
df_holdings["current_value"] = (
    df_holdings["close"] * df_holdings["holding_quantity"]
)

# Forward fill the values to fill missing dates
df_holdings = df_holdings.ffill()

# Filter out rows with zero holding quantity
df_holdings = df_holdings[df_holdings["holding_quantity"] != 0]

# Round the numeric columns to two decimal places
df_holdings = df_holdings.round(2)

In [6]:
# Sort the DataFrame for organized output
df_holdings = df_holdings.sort_values(
    by=["date", "segment", "stock_name"]
).reset_index(drop=True)

# Select and order the columns for the final output
df_holdings = df_holdings[
    [
        "date",
        "segment",
        "stock_name",
        "holding_quantity",
        "holding_price_avg",
        "holding_amount",
        "close",
        "current_value",
    ]
]

# Save the final processed DataFrame to a new CSV file
df_holdings.to_csv(global_path.holdings_gold_file_path_v2, index=None)

logger.info("GOLD Layer CSV file for Holdings successfully created at:")
logger.info(global_path.holdings_gold_file_path_v2.resolve())

# Display information about the final DataFrame
df_holdings.info()

2024-08-07T14:00:18Z - INFO - GOLD Layer CSV file for Holdings successfully created at:
2024-08-07T14:00:18Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\Holdings\Holdings_data_v2.csv


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5905 entries, 0 to 5904
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   date               5905 non-null   datetime64[ns]
 1   segment            5905 non-null   object        
 2   stock_name         5905 non-null   object        
 3   holding_quantity   5905 non-null   float64       
 4   holding_price_avg  5905 non-null   float64       
 5   holding_amount     5905 non-null   float64       
 6   close              5905 non-null   float64       
 7   current_value      5905 non-null   float64       
dtypes: datetime64[ns](1), float64(5), object(2)
memory usage: 369.2+ KB
