In [1]:
import os
import sys

# Dynamically add the src directory to the path
sys.path.append(os.path.abspath("../src"))

In [2]:
import pandas as pd
import polars as pl

SOURCE = "../data/sampled_df.csv"
COLS = [
    "Year",
    "DEDonorcode",
    "DonorName",
    "DERecipientcode",
    "RecipientName",
    "FlowCode",
    "FlowName",
    # "Bi_Multi",
    # "Category",
    # "Finance_t",
    # "Aid_t",
    "USD_Commitment",
    "USD_Disbursement",
    "USD_Received",
    "USD_Commitment_Defl",
    "USD_Disbursement_Defl",
    "USD_Received_Defl",
    # "CurrencyCode",
    # "Commitment_National",
    # "Disbursement_National",
    # "USD_GrantEquiv",
    # "Geography",
    # "LDCflag",
    # "LDCflagName",
    #  'SDGfocus',
    #  'Keywords',
    #  'Gender',
    #  'Environment',
    #  'DIG',
    #  'Trade',
    #  'RMNCH',
    #  'DRR',
    #  'Nutrition',
    #  'Disability',
    #  'FTC',
    #  'PBA',
    #  'InvestmentProject',
    #  'AssocFinance',
    "Biodiversity",
    "ClimateMitigation",
    "ClimateAdaptation",
    "Desertification",
    "climate_relevance",
    "climate_class_number",
    "climate_class",
    "meta_category",
    "labelled_bilateral",
    "DonorType",
]

### pandas

In [3]:
df = pd.read_csv(SOURCE, usecols=COLS)

### polars

In [4]:
df = pl.read_csv(SOURCE, columns=COLS)

In [None]:
from typing import Literal


def read_data(
    selected_type: Literal["donors", "recipients"],
    source: str,
    columns: list,
    donor_type: Literal["bilateral", "multilateral", "all"],
) -> pl.DataFrame:
    """Read the data from the source and return the data based on the selected type and donor type.

    Args:
        selected_type (Literal["donors", "recipients"]): Whether to return the donors or recipients
        source (str): The path to the source file
        columns (list): The columns to read from the source
        donor_type (Literal["bilateral", "multilateral", "all"]): The type of donor to filter the data

    Returns:
        pl.DataFrame: The data based on the selected type and donor type
    """
    df = pl.read_csv(source=source, columns=columns)

    data = reshape_by_type(df, selected_type)

    data = filter_data_by_donor_type(data, donor_type)

    return data


def reshape_by_type(df: pl.DataFrame, selected_type: str) -> pl.DataFrame:
    """Reshape the table based on the selected type."""
    if selected_type == "donors":
        return df.drop(["DERecipientcode", "RecipientName"]).rename(
            {"DEDonorcode": "DonorCode"}
        )
    elif selected_type == "recipients":
        return df.drop(["DEDonorcode", "DonorName"]).rename(
            {"DERecipientcode": "RecipientCode"}
        )
    else:
        raise ValueError(
            "Invalid selected type. Please select either 'donors' or 'recipients'."
        )


def filter_data_by_donor_type(df: pl.DataFrame, donor_type: str) -> pl.DataFrame:
    """Filter the data based on the donor type."""
    if donor_type in ["bilateral", "multilateral"]:
        return df.filter(
            df["DonorType"]
            == ("Donor Country" if donor_type == "bilateral" else "Multilateral Donor")
        )
    return df  # No filtering needed if donor_type is "all"


read_data("donors", source=SOURCE, columns=COLS, donor_type="all")