In [14]:
import pandas as pd
import numpy as np
import os
import pathlib
from common_utilities import replace_punctuation_from_columns,TradeHistory,stock_names_dict

In [15]:
## Folder and File path
bronze_path = "../DATA/BRONZE/"
silver_path = "../DATA/SILVER/"
gold_path   = "../DATA/GOLD/"

bronze_trade_history_file_path = bronze_path+"TRADE_HISTORY/"
bronze_stock_price_file_path = bronze_path+"STOCK_PRICE/"

silver_file_path = silver_path+'TradeHistory.csv'
gold_file_path = gold_path+'TradeHistory.csv'
gold_stock_price_file_path = gold_path+"StockPrice.csv"

In [16]:
## BRONZE TO SILVER

# Initialize an empty list to store DataFrames
dfs = []

# List all CSV files in the folder
bronze_csv_files = pathlib.Path(bronze_trade_history_file_path).glob('*.csv')

# Loop through the CSV files
for file_path in bronze_csv_files:
    # Read the CSV file 
    df = pd.read_csv(file_path)

    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate all DataFrames into one
df_silver = pd.concat(dfs, ignore_index=True)

df_silver.columns = replace_punctuation_from_columns(df_silver.columns)
df_silver.dropna(how='all', axis=1, inplace=True) 
df_silver = df_silver[df_silver['segment'] == 'EQ']

df_silver['date'] = pd.to_datetime(df_silver['date'])
df_silver['scrip_code'] = df_silver['scrip_code'].astype('int64')
df_silver['expiry'] = pd.to_datetime(df_silver['expiry'])
df_silver['quantity'] = df_silver['quantity'].astype('int64')

# sort the dataframe by date
df_silver = df_silver.sort_values(by=["date","trade_time","company"])

# Save the result as a CSV file
df_silver.to_csv(silver_file_path, index=False)
df_silver.info()

<class 'pandas.core.frame.DataFrame'>
Index: 42 entries, 39 to 40
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   date             42 non-null     datetime64[ns]
 1   company          42 non-null     object        
 2   amount           42 non-null     float64       
 3   exchange         42 non-null     object        
 4   segment          42 non-null     object        
 5   scrip_code       42 non-null     int64         
 6   instrument_type  42 non-null     object        
 7   strike_price     42 non-null     object        
 8   expiry           0 non-null      datetime64[ns]
 9   trade_num        40 non-null     float64       
 10  trade_time       40 non-null     object        
 11  side             42 non-null     object        
 12  quantity         42 non-null     int64         
 13  price            42 non-null     float64       
dtypes: datetime64[ns](2), float64(3), int64(2), obje

  df_silver = pd.concat(dfs, ignore_index=True)


In [17]:
## SILVER TO GOLD

# read the csv file
df_silver = pd.read_csv(silver_file_path)

# replace scrip code to compnay name
df_silver["stock_name"] = df_silver["scrip_code"].replace(stock_names_dict)

# combine 'date' and 'trade_time', and create a datetime column with fallback to 'date' if 'trade_time' is missing
df_silver["datetime"] = pd.to_datetime(df_silver["date"] + " " + df_silver["trade_time"].fillna("00:00:00"))

# sort the dataframe by date
df_silver = df_silver.sort_values(by="datetime")

# rename the columns
df_silver = df_silver.rename(
    columns={
        "side": "trade_type",
        "quantity": "trade_quantity",
        "price": "trade_price",
    }
)

data_dict = list()
trade_history = dict()
for _, row in df_silver.iterrows():
    stock_name = row["stock_name"]

    if stock_name not in trade_history:
        trade_history[stock_name] = TradeHistory(stock_name)

    if row["trade_type"] == "Buy":
        row["buy_price"] = row["trade_price"]
        trade_history[stock_name].trade_price.append(row["trade_price"])
        trade_history[stock_name].trade_quantity.append(row["trade_quantity"])
    elif row["trade_type"] == "Sell":
        row["sell_price"] = row["trade_price"]
        row["buy_price"] = trade_history[stock_name].fifo_sell_calc(
            row["trade_quantity"]
        )
    else:
        raise Exception(f'{row["trade_type"]} was never excepected')
    
    row["holding_quantity"] = trade_history[stock_name].holding_quantity()
    row["avg_price"] = trade_history[stock_name].calc_avg_price()

    data_dict.append(row)

df_gold = pd.DataFrame(data_dict)
df_gold = df_gold.round(2)
df_gold = df_gold[
    [
        "datetime",
        "stock_name",
        "trade_type",
        "trade_quantity",
        "buy_price",
        "sell_price",
        "holding_quantity",
        "avg_price",
    ]
]
df_gold.to_csv(gold_file_path, index=False)
df_gold.info()

<class 'pandas.core.frame.DataFrame'>
Index: 42 entries, 0 to 41
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   datetime          42 non-null     datetime64[ns]
 1   stock_name        42 non-null     object        
 2   trade_type        42 non-null     object        
 3   trade_quantity    42 non-null     int64         
 4   buy_price         42 non-null     float64       
 5   sell_price        15 non-null     float64       
 6   holding_quantity  42 non-null     int64         
 7   avg_price         42 non-null     float64       
dtypes: datetime64[ns](1), float64(3), int64(2), object(2)
memory usage: 3.0+ KB


In [18]:
## GOLD STOCK_PRICE

# DATA SOURCE 'https://query1.finance.yahoo.com/v7/finance/download/{each}.NS?period1=1540857600&period2=1698624000&interval=1d&events=history&includeAdjustedClose=true'

# List all CSV files in the folder
csv_files = [f for f in os.listdir(bronze_stock_price_file_path) if f.endswith(".NS.csv")]

# Initialize an empty list to store DataFrames
df_gold_stock_price = None

# Loop through the CSV files
for file in csv_files:
    file_path = os.path.join(bronze_stock_price_file_path, file)
    stock_name = file.split(".")[0].upper().strip()
    # Read the csv file
    temp_df = pd.read_csv(file_path)
    temp_df = temp_df[["Date", "Close"]]
    temp_df = temp_df.rename(
        columns={
            "Date": "date",
            "Close": stock_name,
        }
    )
    # marge the DataFrame
    if df_gold_stock_price is not None:
        df_gold_stock_price = pd.merge(df_gold_stock_price, temp_df, on="date", how="outer")
    else:
        df_gold_stock_price = temp_df

df_gold_stock_price = df_gold_stock_price.round(2)
# Save the result as a CSV file
df_gold_stock_price.to_csv(gold_stock_price_file_path, index=False)
df_gold_stock_price.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1235 entries, 0 to 1234
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        1235 non-null   object 
 1   BHAGERIA    1235 non-null   float64
 2   NIFTYBEES   1235 non-null   float64
 3   SBIN        1235 non-null   float64
 4   PNB         1235 non-null   float64
 5   YESBANK     1235 non-null   float64
 6   GOLDBEES    1235 non-null   float64
 7   IDEA        1235 non-null   float64
 8   VOLTAS      1235 non-null   float64
 9   TATAMOTORS  1235 non-null   float64
 10  BPCL        1235 non-null   float64
 11  INFY        1235 non-null   float64
 12  TATACHEM    1235 non-null   float64
 13  HERANBA     655 non-null    float64
 14  TATAPOWER   1235 non-null   float64
 15  LICI        361 non-null    float64
dtypes: float64(15), object(1)
memory usage: 154.5+ KB


In [19]:
## GOLD INVESTED and HOLDING

# read the csv file
df_gold = pd.read_csv(gold_file_path)

# convert Datetime to Date string
df_gold["date"] = pd.to_datetime(df_gold["datetime"]).dt.date

golden_table_names = {"Invested": "avg_price", "Holdings": "holding_quantity"}

date_range = pd.date_range(
    start=df_gold["date"].min(), end=pd.to_datetime("today"), freq="D"
)

for file_name,table_name in golden_table_names.items():
    # Create a new DataFrame with an updated date range
    df_merged = pd.DataFrame({"date": date_range.date})
    grouped = df_gold.groupby("stock_name")
    for stock_name, group in grouped:
        df_merged = pd.merge(
            df_merged, group[["date", table_name]], on="date", how="left"
        ).rename(
            columns={table_name: stock_name},
        )

    # setting date column as index
    df_merged.set_index("date", inplace=True)
    # Reindexing to fill the missing data with the last available data
    df_merged = df_merged.ffill()
    df_merged.replace(0.0, np.nan, inplace=True)
    df_merged.to_csv(f"{gold_path}{file_name}.csv")