## SILVER TO GOLD LAYER

### Gold Layer - Holding


In [1]:
# Importing Common Utility Function
import pandas as pd

from StockETL import GlobalPath, Portfolio

In [2]:
# Import necessary libraries and utility functions
%run ../COMMON/common_utility.ipynb

USERNAME = 'ptprashanttripathi'


In [3]:
# Instantiate GlobalPath
tradehistory_silver_file_path = GlobalPath(
    f"DATA/SILVER/TradeHistory/{USERNAME}/TradeHistory_data.csv"
)
stockprice_silver_file_path = GlobalPath("DATA/SILVER/StockPrice/StockPrice_data.csv")
symbol_silver_file_path = GlobalPath("DATA/SILVER/Symbol/Symbol_data.csv")

current_holding_records_file_path = GlobalPath(
    f"DATA/GOLD/Holding/{USERNAME}/CurrentHolding_data.csv"
)
current_holding_gold_schema_file_path = GlobalPath(
    "CONFIG/DATA_CONTRACTS/GOLD/CurrentHolding.json"
)

holding_gold_file_path = GlobalPath(f"DATA/GOLD/Holding/{USERNAME}/Holding_data.csv")
holding_gold_schema_file_path = GlobalPath("CONFIG/DATA_CONTRACTS/GOLD/Holding.json")

holding_source_layer_path = GlobalPath(
    f"DATA/SOURCE/Holding/{USERNAME}/Holding_data.csv"
)
holding_source_schema_file_path = GlobalPath(
    "CONFIG/DATA_CONTRACTS/SOURCE/Holding.json"
)

### Data Processing

- Read and sort trade history data.
- Apply portfolio trade logic.


In [4]:
# Read the CSV file
df_trade_history = pd.read_csv(tradehistory_silver_file_path)

# Filter the DataFrame to include only rows where the segment is 'EQ' (Equity) or 'MF' (Mutual Funds)
df_trade_history = df_trade_history[df_trade_history["segment"].isin(["EQ", "MF"])]

# Convert 'datetime' to datetime type
df_trade_history["datetime"] = pd.to_datetime(df_trade_history["datetime"])

# Sort the DataFrame by 'datetime'
df_trade_history = df_trade_history.sort_values(by="datetime")

print(f"Read SILVER Layer trade history data from => {tradehistory_silver_file_path}")

Read SILVER Layer trade history data from => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SILVER/TradeHistory/ptprashanttripathi/TradeHistory_data.csv


### Portfolio Logic Application

- Instantiate Portfolio and apply trade logic.
- Handle expired stocks


In [5]:
# Apply the trade logic to each row of the DataFrame
# Instantiate the Portfolio object
portfolio = Portfolio()
# .astype(str)
for record in df_trade_history.to_dict(orient="records"):
    portfolio.trade(record)

portfolio.check_expired_stocks()

In [6]:
# Create a DataFrame from the processed data
df_currentholding = pd.DataFrame(portfolio.get_current_holding())

# Align Datafame with DataContract
df_currentholding = align_with_datacontract(
    df_currentholding, current_holding_gold_schema_file_path
)

# Save the final DataFrame to a CSV file
df_currentholding.to_csv(current_holding_records_file_path, index=None)

print("GOLD Layer CSV file for Current Holdings successfully created at =>")
print(current_holding_records_file_path)

# Display the DataFrame debugrmation
df_currentholding.info()

DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/GOLD/CurrentHolding.json
GOLD Layer CSV file for Current Holdings successfully created at =>
/home/runner/work/PortfolioTracker/PortfolioTracker/DATA/GOLD/Holding/ptprashanttripathi/CurrentHolding_data.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   scrip_name  26 non-null     string        
 1   symbol      26 non-null     string        
 2   exchange    26 non-null     string        
 3   segment     26 non-null     string        
 4   datetime    26 non-null     datetime64[ns]
 5   side        26 non-null     string        
 6   quantity    26 non-null     float64       
 7   price       26 non-null     float64       
 8   amount      26 non-null     float64       
dtypes: datetime64[ns](1), float64(3), string(5)
memory usage:

### Final Processing and Export

- Select and sort relevant columns.
- Save the processed data as a CSV file in the Gold layer.


In [7]:
# Create a DataFrame from the processed data
df_holding = pd.DataFrame(portfolio.get_holding_history())

# Extract date from datetime
df_holding["date"] = df_holding["datetime"].dt.date

# Find the index of the maximum datetime for each scrip_name and date
idx = df_holding.groupby(["scrip_name", "date"])["datetime"].idxmax()

# Use the indices to filter the original DataFrame
df_holding = df_holding.loc[idx].reset_index(drop=True)

In [8]:
# Create a function to expand the date range for each stock


def expand_dates(stock_df):
    """
    Expands the date range for each stock to include all dates from the
    minimum date to the current date and forward fills the missing values.

    Parameters:
    stock_df (pd.DataFrame): DataFrame containing stock data for a single stock.

    Returns:
    pd.DataFrame: DataFrame with expanded date range and forward-filled values.
    """
    # Find the minimum date for this stock
    min_date = stock_df["date"].min()

    # Create a date range from the minimum date to today
    date_range = pd.date_range(start=min_date, end=pd.to_datetime("today"))

    # Reindex the stock_df to include the full date range
    stock_df = stock_df.set_index("date").reindex(date_range)

    # Forward fill the values to fill missing dates
    stock_df = stock_df.ffill().reset_index()

    # Rename the 'index' column to 'date'
    stock_df = stock_df.rename(columns={"index": "date"})

    return stock_df


# Apply the expand_dates function to each stock group
df_holding = (
    df_holding.groupby("scrip_name")
    .apply(expand_dates, include_groups=False)
    .reset_index()
)

In [9]:
# Load stock prices data from the specified CSV file path
df_stockprice = pd.read_csv(stockprice_silver_file_path)
print(f"Loaded SILVER Layer stock price data from => {stockprice_silver_file_path}")

# Convert the 'date' column in stock prices to datetime type for consistency
df_stockprice["date"] = pd.to_datetime(df_stockprice["date"])
df_stockprice.info()

Loaded SILVER Layer stock price data from => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SILVER/StockPrice/StockPrice_data.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32160 entries, 0 to 32159
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    32160 non-null  datetime64[ns]
 1   symbol  32160 non-null  object        
 2   open    32160 non-null  float64       
 3   high    32160 non-null  float64       
 4   low     32160 non-null  float64       
 5   close   32160 non-null  float64       
 6   volume  29653 non-null  float64       
dtypes: datetime64[ns](1), float64(5), object(1)
memory usage: 1.7+ MB


In [10]:
# Merge the expanded holding data with the stock price data
df_holding = pd.merge(
    df_holding,
    df_stockprice,
    on=["date", "symbol"],
    how="left",
)

# Calculate the OHLC value of holding
col_names = ["open", "high", "low", "close"]
for col_name in col_names:
    df_holding[f"{col_name}_price"] = df_holding[col_name]
    df_holding[f"{col_name}_amount"] = (
        df_holding[col_name] * df_holding["holding_quantity"]
    )

# Forward fill the values to fill missing dates
df_holding = df_holding.ffill()

# Filter out rows with zero holding quantity
df_holding = df_holding[df_holding["holding_quantity"] != 0]

In [11]:
# Reset index to ensure it starts from 0
df_holding = df_holding.reset_index(drop=True)

# Align Datafame with DataContract
df_holding = align_with_datacontract(df_holding, holding_gold_schema_file_path)

# Save the final processed DataFrame to a new CSV file
df_holding.to_csv(holding_gold_file_path, index=None)

print(
    f"GOLD Layer CSV file for Holding successfully created at: {holding_gold_file_path}"
)

# Display debugrmation about the final DataFrame
df_holding.info()

DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/GOLD/Holding.json


GOLD Layer CSV file for Holding successfully created at: /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/GOLD/Holding/ptprashanttripathi/Holding_data.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6943 entries, 0 to 6942
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   date              6943 non-null   datetime64[ns]
 1   segment           6943 non-null   string        
 2   exchange          6943 non-null   string        
 3   symbol            6943 non-null   string        
 4   scrip_name        6943 non-null   string        
 5   holding_quantity  6943 non-null   float64       
 6   avg_price         6943 non-null   float64       
 7   holding_amount    6943 non-null   float64       
 8   open_price        6940 non-null   float64       
 9   open_amount       6940 non-null   float64       
 10  high_price        6940 non-null   float64       
 11  high_amount       6940 

In [12]:
# Replace scrip code with scrip_name name
df_symbol = pd.read_csv(symbol_silver_file_path)
print(f"Loaded data from => {symbol_silver_file_path}")

# Calculate the min and max dates for each stock
df_holding = (
    df_holding.groupby(["segment", "exchange", "symbol"])
    .agg(min_date=("date", "min"), max_date=("date", "max"))
    .reset_index()
)

# Merge df_HoldingHistory with df_Symbol on the matching columns
df_holding = df_holding.merge(
    df_symbol[["symbol", "isin"]],
    left_on="symbol",
    right_on="symbol",
    how="left",
)

# Align Datafame with DataContract
df_holding = align_with_datacontract(df_holding, holding_source_schema_file_path)

# Save the final processed DataFrame to a new CSV file
df_holding.to_csv(holding_source_layer_path, index=None)

print(
    f"GOLD Layer CSV file for Holding successfully created at: {holding_source_layer_path}"
)

# Display debugrmation about the final DataFrame
df_holding.info()

Loaded data from => /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SILVER/Symbol/Symbol_data.csv
DataContract loaded from => /home/runner/work/PortfolioTracker/PortfolioTracker/CONFIG/DATA_CONTRACTS/SOURCE/Holding.json
GOLD Layer CSV file for Holding successfully created at: /home/runner/work/PortfolioTracker/PortfolioTracker/DATA/SOURCE/Holding/ptprashanttripathi/Holding_data.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   segment   21 non-null     string        
 1   exchange  21 non-null     string        
 2   symbol    21 non-null     string        
 3   min_date  21 non-null     datetime64[ns]
 4   max_date  21 non-null     datetime64[ns]
 5   isin      21 non-null     string        
dtypes: datetime64[ns](2), string(4)
memory usage: 1.1 KB
