In [19]:
# Enable autoreload
%load_ext autoreload
%autoreload 2

# Processing each task
from numpy import ndarray
from pandas import DataFrame, read_csv
from matplotlib.pyplot import savefig, show, figure
from pathlib import Path
import sys

# Add the project directory to sys.path
project_dir = Path.cwd().parent.parent  # Adjust as needed to point to your project root
sys.path.append(str(project_dir))

# Import the module
from utils.dslabs_functions import plot_multibar_chart, CLASS_EVAL_METRICS, run_NB, run_KNN

# e stands for economic and s stands for secuity
# c stands for classification and f stands for forecast
path = "../../../data/"
classification_datasets = [
    "c_e_class_financial_distress.csv",
    "c_s_class_ny_arrests.csv",
]
forcasting_datasets = [
    "f_e_forecast_gdp_europe.csv",
    "f_s_forecast_ny_arrests.csv",
]
classification_targets = {
    "e": "CLASS",
    "s": "LAW_CAT_CD",
}

forecasting_targets = {
    "e": "GDP",
    "s": "Manhattan",
}




def evaluate_approach(
    train: DataFrame, test: DataFrame, target: str = "class", metric: str = "accuracy"
) -> dict[str, list]:
    trnY = train.pop(target).values
    trnX: ndarray = train.values
    tstY = test.pop(target).values
    tstX: ndarray = test.values
    eval: dict[str, list] = {}

    eval_NB: dict[str, float] = run_NB(trnX, trnY, tstX, tstY, metric=metric)
    eval_KNN: dict[str, float] = run_KNN(trnX, trnY, tstX, tstY, metric=metric)
    if eval_NB != {} and eval_KNN != {}:
        for met in CLASS_EVAL_METRICS:
            eval[met] = [eval_NB[met], eval_KNN[met]]
    return eval


# target = "stroke"
# file_tag = "stroke"
# train: DataFrame = read_csv("data/stroke_train.csv")
# test: DataFrame = read_csv("data/stroke_test.csv")

def show_answers(file_tag: str, train: DataFrame, test: DataFrame, target: str = "class"):
    figure()
    eval: dict[str, list] = evaluate_approach(train, test, target=target, metric="recall")
    plot_multibar_chart(
        ["NB", "KNN"], eval, title=f"{file_tag} evaluation", percentage=True
    )
    savefig(f"images/{file_tag}_eval.png")
    show()
    print(eval)
    print("\n\n\n")

# for each forcasting
for file in forcasting_datasets:
    file_tag = file.split(".")[0]
    print("Forcasting: ", file_tag)
    data = read_csv(path + file)
    
    # use sklearn to split the data
    from sklearn.model_selection import train_test_split
    train, test = train_test_split(data, test_size=0.2)
    
    type_key = file_tag.split("_")[1][0]
    target = forecasting_targets[type_key]
    print("target: ", target)
    show_answers(file_tag, train, test, target=target)
    


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Forcasting:  f_e_forecast_gdp_europe
target:  GDP


ValueError: Unknown label type: (array([  359.029,   390.887,   427.015,   470.415,   521.328,   567.732,
         615.438,   661.332,   687.385,   754.985,   854.807,   966.895,
        1157.617,  1469.889,  1652.067,  1925.962,  1994.811,  2259.061,
        2768.458,  3160.727,  3286.231,  3414.524,  3859.035,  4333.946,
        5362.16 ,  5981.271,  7808.204,  7859.432,  8291.509,  8563.868,
        8906.062,  9003.878,  9583.143,  9617.733,  9816.362, 14433.131,
       16546.782, 17009.515, 17102.541, 17316.911, 17338.846, 17793.676,
       18053.021, 18374.653, 18669.133, 19137.007]),)

<Figure size 640x480 with 0 Axes>