In [158]:
import pandas as pd

In [159]:
# Function to calculate fiscal year in "YYYY-YYYY" format
def get_fiscal_year_range(date):
    start_month = 4
    if date.month >= start_month:
        return f"{date.year}-{date.year + 1}"
    else:
        return f"{date.year - 1}-{date.year}"

In [160]:
df_tax = pd.read_csv("../DATA/BRONZE/TaxReport/TaxReport_data.csv")

# Apply the function to add a new column for fiscal year
df_tax["fy"] = pd.to_datetime(df_tax["close_datetime"]).apply(
    get_fiscal_year_range
)
df_tax["close_datetime"] = pd.to_datetime(
    pd.to_datetime(df_tax["close_datetime"]).dt.date
)

df_tax["stock_name"] = df_tax["stock_name"].apply(lambda a: "-".join(a.split("-")[1:-1]) if len(a.split("-")) > 1 else a)
df_tax = (
    df_tax.groupby(
        [
            "close_datetime",
            "segment",
            "stock_name",
        ]
    )
    .agg({"pnl_amount": "sum"})
    .sort_values(
        by=[
            "segment",
            "stock_name",
        ]
    )
    .reset_index()
)
print(df_tax["pnl_amount"].sum())
df_tax

10519.95


Unnamed: 0,close_datetime,segment,stock_name,pnl_amount
0,2020-06-15,EQ,BHAGERIA,29.50
1,2022-08-10,EQ,BPCL,-5510.45
2,2021-06-04,EQ,GOLDBEES,0.42
3,2021-03-05,EQ,HERANBA,2316.00
4,2021-03-09,EQ,HERANBA,1628.00
...,...,...,...,...
59,2024-07-29,FO,PE-24900,-3085.00
60,2024-07-30,FO,PE-24900,40.00
61,2024-07-31,FO,PE-24950,-335.00
62,2024-08-01,FO,PE-24950,-868.75


In [161]:
# filepath of the CSV file
filepath = "../DATA/GOLD/ProfitLoss/ProfitLoss_data.csv"

# Read the CSV file into a DataFrame
df_pnl = pd.read_csv(filepath)

df_pnl["close_datetime"] = pd.to_datetime(
    pd.to_datetime(df_pnl["close_datetime"]).dt.date
)
df_pnl["stock_name"] = df_pnl["stock_name"].apply(lambda a: "-".join(a.split("-")[1:-1]) if len(a.split("-")) > 1 else a)
df_pnl = (
    df_pnl.groupby(
        [
            "close_datetime",
            "segment",
            "stock_name",
        ]
    )
    .agg({"pnl_amount": "sum"})
    .sort_values(
        by=[
            "segment",
            "stock_name",
        ]
    )
    .reset_index()
)
print(df_pnl["pnl_amount"].sum())
df_pnl

13639.95


Unnamed: 0,close_datetime,segment,stock_name,pnl_amount
0,2020-06-15,EQ,BHAGERIA,29.50
1,2022-08-10,EQ,BPCL,-5510.45
2,2021-06-04,EQ,GOLDBEES,0.42
3,2021-03-05,EQ,HERANBA,2316.00
4,2021-03-09,EQ,HERANBA,1628.00
...,...,...,...,...
59,2024-07-29,FO,PE-24900,-3085.00
60,2024-07-30,FO,PE-24900,40.00
61,2024-07-31,FO,PE-24950,-335.00
62,2024-08-01,FO,PE-24950,-868.75


In [162]:
# Merge DataFrames on 'stock_name'
df_merged = pd.merge(
    df_pnl,
    df_tax,
    on=[
        "close_datetime",
        "segment",
        "stock_name",
    ],
    suffixes=("_pnl", "_tax"),
    how="outer",
)

# Compare pnl_amount from both DataFrames
df_merged["pnl_match"] = (
    df_merged["pnl_amount_pnl"] == df_merged["pnl_amount_tax"]
)
df_merged = df_merged[df_merged["pnl_match"] == False]
# Remove duplicate stock names, if necessary
# df_merged = df_merged.drop_duplicates(subset=['stock_name'])
df_merged.sort_values(by=["pnl_match", "stock_name"]).fillna(0).reset_index(
    drop=True
)

Unnamed: 0,close_datetime,segment,stock_name,pnl_amount_pnl,pnl_amount_tax,pnl_match
0,2024-07-11,FO,CE-24400,2865.0,45.0,False
1,2024-07-18,FO,PE-24650,-371.25,-671.25,False


In [163]:
# df_trade_history["stock_name"] == "NIFTY-PE-24650-18JUL2024"

In [168]:
# df_pnl[df_pnl["stock_name"].str.contains("PE-24650")]

Unnamed: 0,close_datetime,segment,stock_name,pnl_amount
55,2024-07-15,FO,PE-24650,25.0
56,2024-07-18,FO,PE-24650,-371.25
57,2024-07-26,FO,PE-24650,-796.25


In [169]:
# df_tax[df_tax["stock_name"].str.contains("PE-24650")]

Unnamed: 0,close_datetime,segment,stock_name,pnl_amount
55,2024-07-15,FO,PE-24650,25.0
56,2024-07-18,FO,PE-24650,-671.25
57,2024-07-26,FO,PE-24650,-796.25


In [None]:
# 2024-07-11	FO	CE-24400
# 2024-07-18	FO	PE-24650