## SILVER TO GOLD LAYER

### Gold Layer - Holdings History

In [1]:
## Import necessary libraries and utility functions
import pandas as pd
from ETLTools import GlobalPath

In [2]:
# Instantiate GlobalPath
holdingshistory_gold_file_path = GlobalPath("DATA/GOLD/Holdings/HoldingsHistory_data.csv")
holdingstrands_gold_file_path = GlobalPath(
    "DATA/GOLD/Holdings/HoldingsTrands_data.csv"
)

In [3]:
# Load holdings data from the GOLD layer
df_holdings = pd.read_csv(holdingshistory_gold_file_path)
df_holdings["date"] = pd.to_datetime(df_holdings["date"])
print(
    f"Loaded GOLD Layer holdings data from: {holdingshistory_gold_file_path.relative_path()}"
)

Loaded GOLD Layer holdings data from: DATA/GOLD/Holdings/HoldingsHistory_data.csv


In [4]:
# Group by the 'date' column and calculate the sum for each date
df_holdings_trands = (
    df_holdings.groupby("date")[
        [
            "holding_amount",
            "open_amount",
            "high_amount",
            "low_amount",
            "close_amount",
        ]
    ]
    .sum()
    .reset_index()
)

In [5]:
# Round the numeric columns to two decimal places for precision
df_holdings_trands = df_holdings_trands.round(2)

# Remove '_amount' suffix from column names for clarity
df_holdings_trands = df_holdings_trands.rename(
    columns={
        col: col.replace("_amount", "") for col in df_holdings_trands.columns
    }
)

In [6]:
# Select and reorder columns for the final output format
df_holdings_trands = df_holdings_trands[
    [
        "date",
        "open",
        "high",
        "low",
        "close",
        "holding",
    ]
]

# Sort the DataFrame by 'date' and reset the index for organized output
df_holdings_trands = df_holdings_trands.sort_values(by=["date"]).reset_index(
    drop=True
)

# Save the processed DataFrame to a new CSV file
df_holdings_trands.to_csv(holdingstrands_gold_file_path, index=False)
print(
    f"GOLD Layer CSV file for Holdings successfully created at: {holdingstrands_gold_file_path.relative_path()}"
)

# Display DataFrame debugrmation for verification
df_holdings_trands.info()

GOLD Layer CSV file for Holdings successfully created at: DATA/GOLD/Holdings/HoldingsTrands_data.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1590 entries, 0 to 1589
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   date     1590 non-null   datetime64[ns]
 1   open     1590 non-null   float64       
 2   high     1590 non-null   float64       
 3   low      1590 non-null   float64       
 4   close    1590 non-null   float64       
 5   holding  1590 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 74.7 KB
