## SILVER TO GOLD LAYER

### Gold Layer - Trade History


In [10]:
# Import necessary libraries and utility functions
import pandas as pd
from common.portfolio_calc import Portfolio
from common.utilities import global_path, logger

### Data Processing

- Read and sort trade history data.
- Apply portfolio trade logic.


In [11]:
# Read the CSV file
df_trade_history = pd.read_csv(global_path.tradehistory_silver_file_path)

# Convert 'datetime' to datetime type
df_trade_history["datetime"] = pd.to_datetime(df_trade_history["datetime"])

# Sort the DataFrame by 'datetime'
df_trade_history = df_trade_history.sort_values(by="datetime")

logger.info(
    f"Read SILVER Layer trade history data from: {global_path.tradehistory_silver_file_path}"
)

2024-08-07T20:23:29Z - INFO - Read SILVER Layer trade history data from: C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\TradeHistory\TradeHistory_data.csv


### Portfolio Logic Application

- Instantiate Portfolio and apply trade logic.
- Handle expired stocks


In [12]:
# Apply the trade logic to each row of the DataFrame
# Instantiate the Portfolio object
portfolio = Portfolio()
for record in df_trade_history.astype(str).to_dict(orient="records"):
    portfolio.trade(record)

portfolio.check_expired_stocks()
# expired_stocks example : df_trade_history["stock_name"] == "NIFTY-PE-24650-18JUL2024"

2024-08-07T20:23:29Z - INFO - NIFTY-PE-24650-18JUL2024 => 100.0 expired


### Final Processing and Export

- Select and sort relevant columns.
- Save the processed data as a CSV file in the Gold layer.


In [13]:
# Create a DataFrame from the processed data
df_pnl = pd.DataFrame(portfolio.get_pnl())

# Round the values in the columns to two decimal places
df_pnl = df_pnl.round(2)

# Update 'side' column: 'SELL' becomes 'LONG', 'BUY' becomes 'SHORT'
df_pnl["position"] = df_pnl["close_side"].apply(
    lambda x: "LONG" if x == "SELL" else "SHORT" if x == "BUY" else x
)

# Sort the DataFrame by 'segment', 'stock_name', and 'datetime'
df_pnl = df_pnl.sort_values(
    by=["segment", "stock_name", "close_datetime", "open_datetime"]
)

# Round the values in to two decimal places
df_pnl = df_pnl.round(2)

# Reset index to ensure it starts from 0
df_pnl = df_pnl.reset_index(drop=True)

# Select and reorder the columns for the final DataFrame
df_pnl = df_pnl[
    [
        "exchange",
        "segment",
        "stock_name",
        "position",
        "quantity",
        "open_datetime",
        "open_side",
        "open_price",
        "open_amount",
        "close_datetime",
        "close_side",
        "close_price",
        "close_amount",
        "pnl_amount",
        "pnl_percentage",
    ]
]

# Save the final DataFrame to a CSV file
df_pnl.to_csv(global_path.profitloss_gold_file_path, index=None)

logger.info("GOLD Layer CSV file for ProfitLoss successfully created at:")
logger.info(global_path.profitloss_gold_file_path.resolve())

# Display the DataFrame information
df_pnl.info()

2024-08-07T20:23:29Z - INFO - GOLD Layer CSV file for ProfitLoss successfully created at:
2024-08-07T20:23:29Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\ProfitLoss\ProfitLoss_data.csv


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   exchange        120 non-null    object        
 1   segment         120 non-null    object        
 2   stock_name      120 non-null    object        
 3   position        120 non-null    object        
 4   quantity        120 non-null    float64       
 5   open_datetime   120 non-null    datetime64[ns]
 6   open_side       120 non-null    object        
 7   open_price      120 non-null    float64       
 8   open_amount     120 non-null    float64       
 9   close_datetime  120 non-null    datetime64[ns]
 10  close_side      120 non-null    object        
 11  close_price     120 non-null    float64       
 12  close_amount    120 non-null    float64       
 13  pnl_amount      120 non-null    float64       
 14  pnl_percentage  120 non-null    float64       
dtypes: dat

In [14]:
# Create a DataFrame from the processed data
df_holdings = pd.DataFrame(portfolio.get_holdings())

# Filter the DataFrame to include only rows where the segment is 'EQ' (Equity) or 'MF' (Mutual Funds)
df_holdings = df_holdings[df_holdings["segment"].isin(["EQ", "MF"])]

# Round the values in the columns to two decimal places
df_holdings = df_holdings.round(2)

# Extract date from datetime
df_holdings["date"] = df_holdings["datetime"].dt.date

# Find the index of the maximum datetime for each stock_name and date
idx = df_holdings.groupby(["stock_name", "date"])["datetime"].idxmax()

# Use the indices to filter the original DataFrame
df_holdings = df_holdings.loc[idx].reset_index(drop=True)

# Sort the DataFrame by 'segment', 'stock_name', and 'date'
df_holdings = df_holdings.sort_values(by=["segment", "stock_name", "date"])

# Select and reorder the columns for the final DataFrame
df_holdings = df_holdings[
    [
        "date",
        "segment",
        "exchange",
        "stock_name",
        "holding_quantity",
        "avg_price",
        "holding_amount",
    ]
]

# Save the final DataFrame to a CSV file
df_holdings.to_csv(global_path.holdings_gold_file_path, index=None)

logger.info("GOLD Layer CSV file for Holding successfully created at:")
logger.info(global_path.holdings_gold_file_path.resolve())

# Display the DataFrame information
df_holdings.info()

2024-08-07T20:23:29Z - INFO - GOLD Layer CSV file for Holding successfully created at:
2024-08-07T20:23:29Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\Holdings\Holdings_data.csv


<class 'pandas.core.frame.DataFrame'>
Index: 47 entries, 0 to 28
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   date              47 non-null     object 
 1   segment           47 non-null     object 
 2   exchange          47 non-null     object 
 3   stock_name        47 non-null     object 
 4   holding_quantity  47 non-null     float64
 5   avg_price         47 non-null     float64
 6   holding_amount    47 non-null     float64
dtypes: float64(3), object(4)
memory usage: 2.9+ KB


In [15]:
# Create a function to expand the date range for each stock
def expand_dates(stock_df):
    """
    Expands the date range for each stock to include all dates from the
    minimum date to the current date and forward fills the missing values.

    Parameters:
    stock_df (pd.DataFrame): DataFrame containing stock data for a single stock.

    Returns:
    pd.DataFrame: DataFrame with expanded date range and forward-filled values.
    """
    # Find the minimum date for this stock
    min_date = stock_df["date"].min()

    # Create a date range from the minimum date to today
    date_range = pd.date_range(start=min_date, end=pd.to_datetime("today"))

    # Reindex the stock_df to include the full date range
    stock_df = stock_df.set_index("date").reindex(date_range)

    # Forward fill the values to fill missing dates
    stock_df = stock_df.ffill().reset_index()

    # Rename the 'index' column to 'date'
    stock_df = stock_df.rename(columns={"index": "date"})

    return stock_df


# Apply the expand_dates function to each stock group
df_holdings_v2 = (
    df_holdings.groupby("stock_name")
    .apply(expand_dates, include_groups=False)
    .reset_index()
)

In [16]:
# Load stock prices data from the specified CSV file path
df_StockPrice = pd.read_csv(global_path.stockprice_silver_file_path)

# Convert the 'date' column in stock prices to datetime type for consistency
df_StockPrice["date"] = pd.to_datetime(df_StockPrice["date"])

logger.info(
    f"Loaded SILVER Layer stock price data from: {global_path.stockprice_silver_file_path}"
)

# Merge the expanded holdings data with the stock price data
df_holdings_v2 = pd.merge(
    df_holdings_v2,
    df_StockPrice,
    on=["date", "stock_name"],
    how="left",
)

2024-08-07T20:23:30Z - INFO - Loaded SILVER Layer stock price data from: C:\Users\prashant.tripathi\Code\Upstox\DATA\SILVER\StockPrice\StockPrice_data.csv


In [17]:
# Calculate the OHLC value of holdings
col_names = ["open", "high", "low", "close"]
for col_name in col_names:
    df_holdings_v2[f"{col_name}_amount"] = (
        df_holdings_v2[col_name] * df_holdings_v2["holding_quantity"]
    )

df_holdings_v2 = df_holdings_v2.rename(
    columns={col_name: f"{col_name}_price" for col_name in col_names}
)

# Forward fill the values to fill missing dates
df_holdings_v2 = df_holdings_v2.ffill()

# Filter out rows with zero holding quantity
df_holdings_v2 = df_holdings_v2[df_holdings_v2["holding_quantity"] != 0]

# Round the numeric columns to two decimal places
df_holdings_v2 = df_holdings_v2.round(2)

# Sort the DataFrame for organized output
df_holdings_v2 = df_holdings_v2.sort_values(
    by=["date", "segment", "stock_name"]
).reset_index(drop=True)

# Select and order the columns for the final output
df_holdings_v2 = df_holdings_v2[
    [
        "date",
        "segment",
        "stock_name",
        "holding_quantity",
        "avg_price",
        "holding_amount",
        "open_price",
        "open_amount",
        "high_price",
        "high_amount",
        "low_price",
        "low_amount",
        "close_price",
        "close_amount",
    ]
]

# Save the final processed DataFrame to a new CSV file
df_holdings_v2.to_csv(global_path.holdings_gold_file_path_v2, index=None)

logger.info("GOLD Layer CSV file for Holdings successfully created at:")
logger.info(global_path.holdings_gold_file_path_v2.resolve())

# Display information about the final DataFrame
df_holdings_v2.info()

2024-08-07T20:23:30Z - INFO - GOLD Layer CSV file for Holdings successfully created at:
2024-08-07T20:23:30Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\Holdings\Holdings_data_v2.csv


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5905 entries, 0 to 5904
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   date              5905 non-null   datetime64[ns]
 1   segment           5905 non-null   object        
 2   stock_name        5905 non-null   object        
 3   holding_quantity  5905 non-null   float64       
 4   avg_price         5905 non-null   float64       
 5   holding_amount    5905 non-null   float64       
 6   open_price        5905 non-null   float64       
 7   open_amount       5905 non-null   float64       
 8   high_price        5905 non-null   float64       
 9   high_amount       5905 non-null   float64       
 10  low_price         5905 non-null   float64       
 11  low_amount        5905 non-null   float64       
 12  close_price       5905 non-null   float64       
 13  close_amount      5905 non-null   float64       
dtypes: datetime64[ns](1), fl

In [18]:
# Group by the 'date' column and calculate the sum for each date
df_holdings_v3 = (
    df_holdings_v2.groupby("date")[
        [
            "holding_amount",
            "open_amount",
            "high_amount",
            "low_amount",
            "close_amount",
        ]
    ]
    .sum()
    .reset_index()
)

# Round the numeric columns to two decimal places
df_holdings_v3 = df_holdings_v3.round(2)

# Sort the DataFrame for organized output
df_holdings_v3 = df_holdings_v3.sort_values(by=["date"]).reset_index(drop=True)

# Select and order the columns for the final output
df_holdings_v3 = df_holdings_v3[
    [
        "date",
        "holding_amount",
        "open_amount",
        "high_amount",
        "low_amount",
        "close_amount",
    ]
]


# Save the final processed DataFrame to a new JSON file
df_holdings_v3.to_json(
    global_path.holdings_gold_file_path_v3,
    orient="records",
    date_format="iso",
    indent=4,
)


logger.info("GOLD Layer JSON file for Holdings successfully created at:")
logger.info(global_path.holdings_gold_file_path_v3.resolve())

# Display information about the final DataFrame
df_holdings_v3.info()

2024-08-07T20:23:30Z - INFO - GOLD Layer JSON file for Holdings successfully created at:
2024-08-07T20:23:30Z - INFO - C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\Holdings\Holdings_data_v3.json


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1570 entries, 0 to 1569
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            1570 non-null   datetime64[ns]
 1   holding_amount  1570 non-null   float64       
 2   open_amount     1570 non-null   float64       
 3   high_amount     1570 non-null   float64       
 4   low_amount      1570 non-null   float64       
 5   close_amount    1570 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 73.7 KB
