## BRONZE TO SILVER LAYER


In [1]:
import numpy as np
import pandas as pd
from global_paths import GlobalPaths

In [2]:
# Initialize TradeHistory Paths
TradeHistoryPath = GlobalPaths(source_name="DATA", object_name="TradeHistory")
TradeHistoryGoldPath = TradeHistoryPath.createLayer(layer_name="GOLD")
TradeHistoryGoldFile = TradeHistoryGoldPath.joinpath("TradeHistory_data.csv")

# Initialize Investment Paths
InvestmentPath = GlobalPaths(source_name="DATA", object_name="Investment")
InvestmentGoldPath = InvestmentPath.createLayer(layer_name="GOLD")
InvestmentGoldFile = InvestmentGoldPath.joinpath("Investment_data.csv")

### Silver Layer - Process Investment History

In [3]:
 # Read the CSV file
df_TradeHistory_GOLD = pd.read_csv(TradeHistoryGoldFile)

# Filter the DataFrame
df_TradeHistory_GOLD = df_TradeHistory_GOLD[df_TradeHistory_GOLD["segment"].isin(["MF", "EQ"])]

# Convert datetime to date string
df_TradeHistory_GOLD["date"] = pd.to_datetime(
    df_TradeHistory_GOLD["datetime"]
).dt.date

# Create a new DataFrame with an updated date range
date_range = pd.date_range(
    start=df_TradeHistory_GOLD["date"].min(), end=pd.to_datetime("today"), freq="D"
)    
df_Investment_GOLD = pd.DataFrame({"date": date_range.date})

for stock_name, group in df_TradeHistory_GOLD.groupby("stock_name"):
    df_Investment_GOLD = pd.merge(df_Investment_GOLD, group[["date", "avg_price"]], on="date", how="left").rename(
        columns={"avg_price": stock_name},
    )

# Setting date column as index
df_Investment_GOLD.set_index("date", inplace=True)

# Reindexing to fill the missing data with the last available data
df_Investment_GOLD = df_Investment_GOLD.ffill()
df_Investment_GOLD.replace(0.0, np.nan, inplace=True)

# Save the result as a csv file
df_Investment_GOLD.to_csv(InvestmentGoldFile, index=None)
df_Investment_GOLD.info()
print("Gold Layer csv file for Investment successfully created at:")
print(InvestmentGoldFile.resolve())

<class 'pandas.core.frame.DataFrame'>
Index: 1545 entries, 2020-04-21 to 2024-07-12
Data columns (total 19 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   BHAGERIA                             41 non-null     float64
 1   BPCL                                 369 non-null    float64
 2   GOLDBEES                             116 non-null    float64
 3   HERANBA                              5 non-null      float64
 4   IDEA                                 321 non-null    float64
 5   INFY                                 64 non-null     float64
 6   IRCTC                                39 non-null     float64
 7   KPITTECH                             30 non-null     float64
 8   LICI                                 89 non-null     float64
 9   MIRAE-ASSET-TAX-SAVER-DIRECT-GROWTH  50 non-null     float64
 10  NIFTYBEES                            126 non-null    float64
 11  PNB                 