In [1]:
from pathlib import Path

import pandas as pd

In [2]:
DATA_DIR = Path("..", "..", "data")

In [3]:
PORTFOLIO_CODE = "Portoflio Code"
ASSET_CODE = "Asset Code"
QUANTITY = "Quantity"

### Chargement des données

In [4]:
usecols = [PORTFOLIO_CODE, ASSET_CODE, QUANTITY]

portfolio_snapshot_1_df = pd.read_csv(
    Path(DATA_DIR, "20241122_203007.ptf_inventories.csv"),
    sep=";",
    usecols=usecols,
)

portfolio_snapshot_2_df = pd.read_csv(
    Path(DATA_DIR, "20241125_200215.ptf_inventories.csv"),
    sep=";",
    usecols=usecols,
)

### Agrégation des lignes multiples pour un même asset dans un même portefeuille

In [5]:
mask_duplicated = portfolio_snapshot_1_df.duplicated(subset=[PORTFOLIO_CODE, ASSET_CODE], keep=False)
portfolio_snapshot_1_df[mask_duplicated]

Unnamed: 0,Portoflio Code,Asset Code,Quantity
47,111111,FutUCO22412,31.609809
62,111111,FutBCO22412,-2.370736
69,111111,FutBCO22412,11.063433
70,111111,FutUCO22412,42.673242
274,111111,ISINS7620S9Y,303.454162
275,111111,ISINS7620S9Y,1296.792395
371,111111,ISIN01Q91500,2245.876896
372,111111,ISIN01Q91500,6513.991291
597,222222,FutBCO22412,-1.58049
602,222222,FutPIN22412,2.370736


In [6]:
aggregated_p1 = portfolio_snapshot_1_df.groupby([PORTFOLIO_CODE, ASSET_CODE]).sum().reset_index()
aggregated_p2 = portfolio_snapshot_2_df.groupby([PORTFOLIO_CODE, ASSET_CODE]).sum().reset_index()


### Jointure (full outer) avant après

In [7]:
merged_df = pd.merge(
    aggregated_p1,
    aggregated_p2,
    how="outer",
    on=[PORTFOLIO_CODE, ASSET_CODE],
    suffixes=("_before", "_after"),
)

### Remplissage des valeurs manquantes à 0 (si un asset n'était pas présent, alors sa quantité valait 0)

In [8]:
merged_df[
    ["Quantity_after", "Quantity_before"]
] = merged_df[
    ["Quantity_after", "Quantity_before"]
].fillna(0)

### Calcul de la variation

In [9]:
merged_df["quantity_variation"] = merged_df["Quantity_after"] - merged_df["Quantity_before"]

### Filtre sur les variations non nulles

In [10]:
mask = merged_df["quantity_variation"] != 0
variation_df = merged_df[mask]

In [11]:
len(variation_df)

63

### Complément : filtre sur les apparitions ou disparitions d'assets

In [12]:
len(
    variation_df[
       (
            (variation_df["Quantity_before"] == 0.) | (variation_df["Quantity_after"] == 0.)
        )
    ]
)

63

In [15]:
OUTPUTDIR = Path(DATA_DIR, "output")
variation_df.to_csv(Path(OUTPUTDIR, "variations.csv"))