In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
import time
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('data_sets/iris.data', names=['sepal_length',	'sepal_width',	'petal_length',	'petal_width','species'])
le = LabelEncoder(); le.fit(df["species"].values)
df["species"] = le.transform(df["species"].values)

In [3]:
def model_test_CV(X, y, cat_cols=None, method="standard", alpha=0.,
                  randomize=False, num_iters=10, subsample_size=1e-2,
                  kde_bandwidth=1.0, kde_kernel='gaussian', kde_leaf_size=40,
                  kde_classwise=False, scale_density=True, n_splits=5, seed=42):
    kf = KFold(n_splits=n_splits, random_state=seed, shuffle=True)
    Accuracy = []
    F1_score = []
    exec_time = []
    for train_index, test_index in tqdm(kf.split(X), total=kf.get_n_splits(), desc="k-fold"):
        pat_cls = fcalc.classifier.PatternClassifier(X[train_index], y[train_index], 
                                                     categorical=cat_cols, method=method,
                                                     alpha=alpha, randomize=randomize, num_iters=num_iters,
                                                     subsample_size=subsample_size, kde_bandwidth=kde_bandwidth,
                                                     kde_kernel=kde_kernel, kde_leaf_size=kde_leaf_size,
                                                     kde_classwise=kde_classwise, scale_density=scale_density)
        start = time.time()
        pat_cls.predict(X[test_index])
        end = time.time()
        Accuracy.append(round(accuracy_score(y[test_index], pat_cls.predictions),4))
        F1_score.append(round(f1_score(y[test_index], pat_cls.predictions, average='macro'),4))
        exec_time.append(round(end-start, 4))
    
    Accuracy.append(np.mean(Accuracy)); F1_score.append(np.mean(F1_score)); exec_time.append(np.mean(exec_time))
    return pd.DataFrame(zip(Accuracy,F1_score,exec_time),
                        columns=["Accuracy","F1 score", "time (s)"],
                        index=[x+1 for x in range(kf.get_n_splits())]+["mean"])

In [5]:
X_train = df.drop('species', axis=1).values
y_train = df['species'].values
n_iters = np.arange(10,51,10)
s_size = np.linspace(1e-2,5e-2,num=5,endpoint=True)
result = []
for n in n_iters:
    for s in s_size:
        res = model_test_CV(X_train,y_train,randomize=True,num_iters=n,subsample_size=s)
        result.append(res.loc["mean"].values)
result=pd.DataFrame(result,columns=["Accuracy","F1 score", "time (sec.)"], 
                    index=pd.MultiIndex.from_product([n_iters, s_size], names=["Number of iterations","Subsample size"]))
display(result)
result.to_csv("D:/University/masters thesis/csv results/iris_random_res_multiindex.csv", index=True)

k-fold: 100%|██████████| 5/5 [00:00<00:00, 20.49it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 23.26it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 23.47it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 19.61it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 23.47it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 11.52it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 12.19it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 12.22it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 12.35it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00, 12.22it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  8.37it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  8.42it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  8.36it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  8.29it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  7.69it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  6.39it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  6.37it/s]
k-fold: 100%|██████████| 5/5 [00:00<00:00,  6.34it/s]
k-fold: 100%|██████████| 5/5

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,F1 score,time (sec.)
Number of iterations,Subsample size,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10,0.01,0.82668,0.6674,0.0464
10,0.02,0.82668,0.6674,0.0404
10,0.03,0.9,0.72664,0.0402
10,0.04,0.9,0.72664,0.0486
10,0.05,0.88,0.67626,0.0402
20,0.01,0.86,0.67932,0.085
20,0.02,0.86,0.67932,0.0792
20,0.03,0.91334,0.86668,0.0792
20,0.04,0.91334,0.86668,0.0784
20,0.05,0.90666,0.73686,0.0792
