In [189]:
%load_ext autoreload
%load_ext nb_black
%autoreload 2
import warnings

warnings.filterwarnings("ignore")
import sys
import os

from src.dataset_loader import *
from src.utils import *
from src.ICA import *
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
import itertools as it
from tqdm import tqdm

seed = 42
sys.path.append("../../")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [190]:
data, G = create_dataset()
model = AdaBoostClassifier()
cls = IDA_classfier(G, 0.4, "betweenness")
x = cls.fit(model, stop_iterations=100)

<IPython.core.display.Javascript object>

In [191]:
experiment_params = {
    "cls model": [
        GaussianNB.__name__,
        RandomForestClassifier.__name__,
        KNeighborsClassifier.__name__,
        AdaBoostClassifier.__name__,
        DecisionTreeClassifier.__name__,
    ],
    "uncover_rate": [0.2, 0.4, 0.6],
    "utility": ["betweenness", "degree", "closeness"],
}

<IPython.core.display.Javascript object>

In [198]:
def get_model(model_name):
    if model_name is GaussianNB.__name__:
        model = GaussianNB()
    elif model_name is DecisionTreeClassifier.__name__:
        model = DecisionTreeClassifier()
    elif model_name is RandomForestClassifier.__name__:
        model = RandomForestClassifier()
    elif model_name is KNeighborsClassifier.__name__:
        model = KNeighborsClassifier()
    elif model_name is AdaBoostClassifier.__name__:
        model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=4))

    return model

<IPython.core.display.Javascript object>

In [199]:
def experiment(model_name, uncover_rate, utility):
    model = get_model(model_name)
    data, G = create_dataset()
    cls = IDA_classfier(G, uncover_rate, utility)
    x = cls.fit(model, stop_iterations=100)
    train = cls._bootstrapped_data[1]
    test = cls._bootstrapped_data[2]
    y_train = cls._bootstrapped_data[3]
    y_test = cls._bootstrapped_data[4]
    raports = raport_fast(cls, train.index, test.index, y_train, y_test)
    return raports

<IPython.core.display.Javascript object>

In [200]:
raport = experiment(AdaBoostClassifier.__name__, 0.1, "closeness")

<IPython.core.display.Javascript object>

In [201]:
def run_experiments():
    allNames = sorted(experiment_params)
    combinations = it.product(*(experiment_params[Name] for Name in allNames))
    test_results = pd.DataFrame()
    for comb in tqdm(list(combinations)):
        model_name = comb[0]
        uncover_rate = comb[1]
        utility = comb[2]
        raport_train, raport_test = experiment(model_name, uncover_rate, utility)
        # print(raport_test)
        test_results = test_results.append(
            {
                "cls": model_name,
                "uncover_rate": uncover_rate,
                "utility": utility,
                "accuracy": raport_test["accuracy"],
                "fscore macro": raport_test["macro avg"]["f1-score"],
                "fscore weighted avg": raport_test["weighted avg"]["f1-score"],
            },
            ignore_index=True,
        )
    test_results.to_csv("results.csv", index=False)

<IPython.core.display.Javascript object>

In [202]:
run_experiments()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [01:15<00:00,  1.67s/it]


<IPython.core.display.Javascript object>