In [10]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from src.classes_etf import IShareETF, AmundiETF
from src.functions import *
import tkinter as tk
from tkinter import messagebox
from collections import defaultdict

filepath =r"C:\Users\alexi\Documents\FINANCE\ETF_PORTFOLIO_ANALYZER\etf_data" #put the excel files downloaded from iShare in the data folder and begin
if not os.path.exists("analysis_data"):
    os.makedirs("analysis_data")
%load_ext autoreload
%autoreload 2

ETF_list = []

# Group Amundi files by ISIN
amundi_files = defaultdict(dict)

# Step 1: Sort all files in the folder
for x in os.listdir(filepath):
    full_path = os.path.join(filepath, x)

    # iShares: handled one file per ETF
    if "ishare" in x.lower() and x.endswith(".xlsx"):
        etf = IShareETF(full_path)
        ETF_list.append(etf)

    # Amundi: collect both NAV and Holdings (Titoli)
    elif "amundi" in x.lower() and x.endswith(".csv"):
        fund_name, isin = parse_amundi_filename(x)
        if "nav" in x.lower():
            amundi_files[isin]['nav'] = full_path
        elif "titoli" in x.lower():
            amundi_files[isin]['titoli'] = full_path


# Step 2: Instantiate AmundiETF only when both files are present
for isin, files in amundi_files.items():
    if 'nav' in files and 'titoli' in files:
        etf = AmundiETF(nav_path=files['nav'], titoli_path=files['titoli'])
        ETF_list.append(etf)
    else:
        print(f"[!] Skipping Amundi ETF with ISIN {isin}: incomplete files ({files})")


# Dictionary to hold user input
quotas_dict = {}

def submit():
    try:
        for etf, entry in entries:
            fund_name = etf.metadata.get('fund_name', 'Unnamed ETF')
            value = int(entry.get()) if entry.get().strip() else 0
            quotas_dict[fund_name] = value
        messagebox.showinfo("Success", "Quotas saved!")
        root.destroy()
    except ValueError:
        messagebox.showerror("Error", "Please enter valid integers only.")

# GUI setup
root = tk.Tk()
root.title("Enter ETF Quotas")

entries = []
for i, etf in enumerate(ETF_list):
    fund_name = etf.metadata.get('fund_name', f'ETF_{i}')
    tk.Label(root, text=f"{fund_name}:").grid(row=i, column=0, sticky='w')
    entry = tk.Entry(root)
    entry.grid(row=i, column=1)
    entries.append((etf, entry))

submit_btn = tk.Button(root, text="Submit", command=submit)
submit_btn.grid(row=len(ETF_list), column=0, columnspan=2, pady=10)

root.mainloop()

# Print the result
print("Final quotas:", quotas_dict)

def compute_geographic_exposure(ETF_list, quotas_dict, plot=True, top_n=None):
    """
    Computes the geographic exposure of a portfolio based on ETF holdings and quotas.

    Parameters:
        ETF_list (list): List of ETF objects, each with `.metadata` and `.holdings`.
        quotas_dict (dict): Dictionary with ETF fund names as keys and quota counts as values.
        plot (bool): Whether to plot a pie chart of the exposure.
        top_n (int or None): If set, shows only top N locations and groups others into "Other".

    Returns:
        pd.Series: Geographic exposure percentages indexed by location.
    """
    portfolio_dfs = []

    for etf in ETF_list:
        fund_name = etf.metadata['fund_name']
        if fund_name not in quotas_dict:
            continue

        quota = quotas_dict[fund_name]
        price = etf.metadata['curr_value']
        df = etf.holdings.copy()

        total_value = quota * price

        df['Weight'] = pd.to_numeric(df['Weight'], errors='coerce')
        df['€ Exposure'] = df['Weight'] / 100 * total_value

        # Filter out rows with invalid or negative exposure
        df = df[df['€ Exposure'].notna() & (df['€ Exposure'] >= 0)]

        df['Location'] = df['Location'].fillna('Unknown')
        portfolio_dfs.append(df[['Location', '€ Exposure']])

    if not portfolio_dfs:
        raise ValueError("No ETF data matched the provided quotas_dict keys.")

    # Combine and group
    full_portfolio = pd.concat(portfolio_dfs)
    exposure = full_portfolio.groupby('Location')['€ Exposure'].sum()
    total_investment = exposure.sum()
    exposure_percent = (exposure / total_investment * 100).sort_values(ascending=False)

    # Optional: reduce to top N
    if top_n is not None and top_n < len(exposure_percent):
        others = exposure_percent.iloc[top_n:].sum()
        exposure_percent = exposure_percent.iloc[:top_n].copy()
        exposure_percent['Other'] = others

    if plot:
        import matplotlib.pyplot as plt

        labels = exposure_percent.index
        sizes = exposure_percent.values

        # Define explode only for the 'Other' slice to make it stand out
        explode = [0.05 if label == 'Other' else 0 for label in labels]

        fig, ax = plt.subplots(figsize=(9, 9))
        wedges, texts, autotexts = ax.pie(
            sizes,
            labels=labels,
            autopct='%1.1f%%',
            startangle=140,
            explode=explode,
            shadow=True,
            textprops={'fontsize': 10},
            wedgeprops={'edgecolor': 'white'}
        )

        ax.set_title('Geographic Exposure of Portfolio', fontsize=14, weight='bold')
        plt.tight_layout()
        plt.show()


    return exposure_percent.round(2)

exposure_percent=compute_geographic_exposure(ETF_list, quotas_dict, plot=True, top_n=None)
exposure_percent.to_csv("analysis_data/geographic_exposure.csv")


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


KeyError: 'Weight'