In [29]:
from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import train_test_split
import os
from sklearn.tree import DecisionTreeClassifier

from utils.resources_monitor import monitor_tic, monitor_toc
from utils.energy_simulator import simulate_energy_consumption
from utils.xai_methods import *

# setting seeds
global_random_seed = 42
np.random.seed(global_random_seed)


# dataset setup
dataset_setup = ['5G_Slicing', [], 'Slice Type (Output)']
# dataset_setup = ['QOE_prediction_ICC2018', ['RebufferingRatio', 'AvgVideoBitRate', 'AvgVideoQualityVariation'], 'StallLabel']

# model setup
model = DecisionTreeClassifier(random_state=global_random_seed)
# model = KNeighborsClassifier()

# xai setup
xai_method = permutation_importance_explanation
# xai_method = shap_explanation


# loading dataset
dataset_name = dataset_setup[0]
dataset_folder = f"../datasets/{dataset_name}"
df = pd.read_csv(f"{dataset_folder}/{[file for file in os.listdir(dataset_folder) if file.endswith('.csv')][0]}", low_memory=False)

# dataset preprocessing
print("Preprocessing ...")
# encoding all no numerical columns
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for column in df.columns:
    if not df[column].dtype.kind in ['i', 'f']:
        print(f" - Encoding column {column}")
        df[column] = le.fit_transform(df[column].astype(str))
    
        
# replacing missing values by mean
if df.isnull().any().any():
    print(" - Replacing missing values by mean")
    df.fillna(df.mean(), inplace=True)


# splitting features & label
X = df.drop(dataset_setup[2], axis=1)
y = df[dataset_setup[2]]

# splitting the dataset in train and test
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=global_random_seed, stratify=y)

# training
print("Training ...")
monitor_tic()
model.fit(x_train, y_train)
tr_action_cpu_percent, tr_action_elapsed_time = monitor_toc()

# validating
print("Testing ...")
monitor_tic()
y_pred = model.predict(x_test)
inf_action_elapsed_time, inf_action_cpu_percent = monitor_toc()
inf_energy = simulate_energy_consumption(inf_action_elapsed_time, inf_action_cpu_percent)

mcc = matthews_corrcoef(y_test, y_pred)

# explaining
print(f"Explaining test dataset ...")
monitor_tic()
te_f_relevance = xai_method(x_test, y_test, model)
te_xai_action_elapsed_time, te_xai_action_cpu_percent = monitor_toc()
xai_energy = simulate_energy_consumption(te_xai_action_elapsed_time, te_xai_action_cpu_percent)

# printing summary
print("Summary: ")
print(f"Inference time (CPU%): {inf_action_elapsed_time} ({inf_action_cpu_percent})")
print(f"Inference energy: {inf_energy} J")

print(f"XAI time (CPU%): {te_xai_action_elapsed_time} ({te_xai_action_cpu_percent})")
print(f"XAI energy: {xai_energy} J")


Preprocessing ...
 - Encoding column Use CaseType (Input 1)
 - Encoding column LTE/5G UE Category (Input 2)
 - Encoding column Technology Supported (Input 3)
 - Encoding column Day (Input4)
 - Encoding column Packet Delay Budget (Latency)
 - Encoding column Slice Type (Output)
Training ...
Testing ...
Explaining test dataset ...
Summary: 
Inference time (CPU%): 0.004562977934256196 (20.0 %)
Inference energy: 5.247081543346937 J
XAI time (CPU%): 0.6660907100886106 (17.190625 %)
XAI energy: 757.2379227464678 J
