In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
data = pd.read_csv(fr"/Users/jordan/Documents/BarclaysStage/portfolio_analysis/positions.csv",index_col=0)

In [5]:
data

Unnamed: 0_level_0,name,ticker,country,currency,sector,industry,sub_industry,beta,avg_daily_volume,side,posn_shares,cost_basis_local,market_price_local
stock_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,equity_1,AGI,DEU,EUR,Information Technology,Software & Services,Systems Software,1.735624,10043,SHORT,-4810,145.19,147.06
2,equity_2,PWH,RUS,USD,Consumer Staples,Food Beverage & Tobacco,Meat Poultry & Fish,0.675354,640830,LONG,38342,112.48,100.28
3,equity_3,NDE,DEU,USD,Industrials,Capital Goods,Heavy Electrical Equipment,0.699394,36094,SHORT,-19881,43.36,41.56
4,equity_4,ABB,USA,USD,Financials,Insurance,Reinsurance,2.215079,135786553,SHORT,-100194,24.79,26.38
5,equity_5,WRF,RUS,USD,Utilities,Utilities,Electric Utilities,1.259133,26094,LONG,3963,149.90,149.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2996,equity_2996,HSH,CAN,CAD,Information Technology,Technology Hardware & Equipment,Electronic Manufacturing Services,1.723244,3193,SHORT,-232,238.13,249.60
2997,equity_2997,QLG,BEL,EUR,Utilities,Utilities,Gas Utilities,0.977303,291651784,LONG,77866,11.07,10.13
2998,equity_2998,YOV,DEU,USD,Utilities,Utilities,Gas Utilities,1.186587,1955,LONG,3743,141.71,160.19
2999,equity_2999,YFG,HKG,CNY,Consumer Discretionary,Retailing,Internet Retail,0.799109,11685566,LONG,49551,225.50,235.19


In [21]:
data.columns

Index(['name', 'ticker', 'country', 'currency', 'sector', 'industry',
       'sub_industry', 'beta', 'avg_daily_volume', 'side', 'posn_shares',
       'cost_basis_local', 'market_price_local'],
      dtype='object')

In [18]:
summary = pd.DataFrame({
        "dtype": data.dtypes,
        "missing": data.isnull().sum(),
    })
desc_data = summary.join(data.describe(include="all").T, how="left")

In [19]:
desc_data

Unnamed: 0,dtype,missing,count,unique,top,freq,mean,std,min,25%,50%,75%,max
name,object,0,3000.0,3000.0,equity_1,1.0,,,,,,,
ticker,object,0,3000.0,3000.0,AGI,1.0,,,,,,,
country,object,0,3000.0,18.0,HKG,294.0,,,,,,,
currency,object,2,2998.0,10.0,EUR,1082.0,,,,,,,
sector,object,0,3000.0,10.0,Consumer Discretionary,655.0,,,,,,,
industry,object,0,3000.0,24.0,Materials,293.0,,,,,,,
sub_industry,object,0,3000.0,165.0,Consumer Finance,37.0,,,,,,,
beta,float64,0,3000.0,,,,1.247901,0.689492,0.110735,0.750708,1.065386,1.547872,7.205475
avg_daily_volume,int64,0,3000.0,,,,34939573.259333,234277134.632115,0.0,11355.25,122590.5,3320203.5,7626954068.0
side,object,0,3000.0,2.0,SHORT,1532.0,,,,,,,


In [22]:
def enrich_data(data,fx_map):
    df = data.copy()
    
    df["fx_to_base"] = df["currency"].map(fx_map).fillna(1.0)
    
    df["notional_local"] = df["posn_shares"] * df["price"]
    df["notional_base"] = df["notional_local"]*df["fx_to_base"]
    df["abs_notional_base"]=df["notional_base"].abs()
    
    df["beta_notional"] = df["beta"] * df["notional_base"] 
    
    df["pct_adv"] = np.where(df["avg_daily_volume"] > 0, np.abs(df["posn_shares"]) / df["avg_daily_volume"], np.nan)
    
    df["unrealised_pnl_pct"] = np.where(df["cost_basis_local"] != 0, df["market_price_local"] / df["cost_basis_local"] - 1.0, np.nan)
    
    df["unrealised_pnl"] = df["unrealised_pnl_pct"] * df["notional_base"]
    
    return df

In [None]:
def compute_portfolio_metric(df):
    
    gross = float(df["abs_notional_base"].sum())
    net = float(df["notional_base"].sum())
    long_gross = float(df.loc[df["notional_base"] > 0, "notional_base"].sum())
    short_gross = float(df.loc[df["notional_base"] < 0, "notional_base"].abs().sum())
    long_pct = long_gross / gross
    short_pct = short_gross / gross
    net_beta = float(df["beta_notional"].sum()) # autre nom
    beta_drift = abs(net_beta) / gross # autre nom

    # Liquidity share of gross in %ADV > 10%
    liq_gross_share = float(df.loc[df["pct_adv"] > 0.10, "abs_notional_base"].sum()) / gross

    # Top-10 concentration
    top10_weight = float(df["abs_notional_base"].nlargest(10).sum()) / gross
    
    return dict(
        gross=gross, net=net,
        long_pct=long_pct, short_pct=short_pct,
        net_beta=net_beta, beta_drift=beta_drift,
        liq_gross_10pct=liq_gross_share,
        top10_weight=top10_weight
    )


def group_summary(df, by):

    total_gross = df["abs_notional_base"].sum() or 1.0
    data_group = (
        df.groupby(by, dropna=False)
        .agg(
            net=("notional_base", "sum"),
            gross=("abs_notional_base", "sum"),
            beta_dollars=("beta_notional", "sum"),
            names=("ticker", "nunique"),
        )
        .reset_index()
    )
    data_group["pct_gross"] = data_group["gross"] / total_gross
    data_group = data_group.sort_values("pct_gross", ascending=False)
    return data_group


def beta_buckets(df, edges = [-9, -0.5, 0, 0.5, 9]) :

    # Bin betas; edges wide at ends to catch extremes
    labels = [f"[{edges[i]},{edges[i+1]})" for i in range(len(edges)-1)]
    data_bucket = df.copy()
    data_bucket["beta_bin"] = pd.cut(data_bucket["beta"], bins=edges, labels=labels, include_lowest=True)
    data_group = data_bucket.groupby("beta_bin", dropna=False).agg(
        gross=("abs_mv_base", "sum"),
        names=("ticker", "nunique")
    ).reset_index()
    total_gross = df["abs_mv_base"].sum() or 1.0
    data_group["pct_gross"] = data_group["gross"] / total_gross
    
    return data_group.sort_values("pct_gross", ascending=False)