## BRONZE TO SILVER LAYER


In [1]:
import pandas as pd
from common_utilities import global_path
import datetime

### SILVER Layer - Process ProfitLoss History


In [2]:
# Read the CSV file into a DataFrame
df = pd.read_csv(global_path.tradehistory_gold_file_path)

# Filter rows to include only specific segments
df = df[df.segment.isin(["EQ", "MF"])]

# Convert 'datetime' column to datetime objects
df["datetime"] = pd.to_datetime(df["datetime"])

# Add a 'date' column by extracting the date part from 'datetime'
df["date"] = df["datetime"].dt.date

# Sort the DataFrame by 'segment', 'stock_name', and 'datetime'
df = df.sort_values(by=["segment", "stock_name", "datetime"])

In [3]:
# Create a DataFrame with the maximum 'datetime' for each combination of 'date' and 'stock_name'
max_datetime_df = (
    df.groupby(["date", "stock_name"])["datetime"].max().reset_index()
)

# Merge the maximum datetime DataFrame with the original DataFrame to retain full rows
df = df.merge(max_datetime_df, on=["date", "stock_name", "datetime"])


In [4]:
# Get a unique list of stock names
stock_names = df["stock_name"].unique()

# Initialize a list to store processed DataFrames
result = []

# Iterate over each unique stock name
for stock_name in stock_names:
    # Filter the DataFrame for the current stock
    stock_data = df[df["stock_name"] == stock_name].copy()

    # Set the 'date' column as index
    stock_data = stock_data.set_index("date")

    # Generate a date range from the earliest date to today
    date_range = pd.date_range(
        start=stock_data.index.min(), end=datetime.datetime.today().date(), freq="D"
    )

    # Reindex DataFrame to include the full date range and forward fill missing values
    stock_data = stock_data.reindex(date_range, method="ffill")

    # Reset index to bring 'date' back as a column
    stock_data = stock_data.reset_index()
    stock_data = stock_data.rename(columns={"index": "date"})

    # Append the processed DataFrame to the result list
    result.append(stock_data)

# Concatenate all processed DataFrames into a single DataFrame
df = pd.concat(result, ignore_index=True)

# Filter out rows where 'holding_quantity' is zero
df = df[df["holding_quantity"] != 0]

# Sort the final DataFrame by 'segment', 'stock_name', and 'date'
df = df.sort_values(by=["segment", "stock_name", "date"])


In [5]:
# Select and reorder columns for the final DataFrame
df = df[
    [
        "date",
        "exchange",
        "segment",
        "stock_name",
        "scrip_code",
        "holding_quantity",
        "avg_price",
        "holding_amount",
    ]
]
# Save the final DataFrame to a CSV file
df.to_csv(global_path.holdings_gold_file_path, index=None)

# Display the DataFrame information
df.info()

# Print success message with the path of the saved file
print("GOLD Layer CSV file for Holdings successfully created at:")
print(global_path.holdings_gold_file_path.resolve())

<class 'pandas.core.frame.DataFrame'>
Index: 5857 entries, 0 to 19359
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   date              5857 non-null   datetime64[ns]
 1   exchange          5857 non-null   object        
 2   segment           5857 non-null   object        
 3   stock_name        5857 non-null   object        
 4   scrip_code        5857 non-null   object        
 5   holding_quantity  5857 non-null   int64         
 6   avg_price         5857 non-null   float64       
 7   holding_amount    5857 non-null   float64       
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 411.8+ KB
GOLD Layer CSV file for Holdings successfully created at:
C:\Users\prashant.tripathi\Code\Upstox\DATA\GOLD\Holdings\Holdings_data.csv
