In [1]:
import pandas as pd

from pathlib import Path
from itertools import chain

In [2]:
def get_results(path):
    """
    Индексы и значения лучших и худших точностей всех
    пользователей(кроме пропущенного) и пропущенного.
    """
    results = {}
    for subj_log in (path / "logs").iterdir():
        subj_id = int(subj_log.stem.split("_")[1])
        with open(subj_log) as f:
            lines = f.readlines()
            rate_before = float(lines[0].strip().split("(")[1][:-1])
            train_values_str = map(
                lambda x: str.strip(x).split("(")[-1][:-1].split("), "),
                lines[-2:],
            )
            values = map(lambda x: (float(x[0]), int(x[1])), train_values_str)
            results[subj_id] = dict(
                zip(
                    [
                        "before",
                        "worst_rate",
                        "worst_step",
                        "best_rate",
                        "best_step",
                    ],
                    [rate_before, *chain.from_iterable(values)],
                )
            )
    return pd.DataFrame(results).transpose().convert_dtypes().sort_index()

In [3]:
def get_with_delta(path):
    df = get_results(path)
    s = df.best_rate - df.before
    s.name = "delta_best_before"
    df = df.merge(s, left_index=True, right_index=True)
    df = df[
        [
            "delta_best_before",
            "before",
            "best_rate",
            "best_step",
            "worst_rate",
            "worst_step",
        ]
    ]
    return df

In [4]:
df_max = get_with_delta(Path("NetUpDownCoder4Max60_ASGD_lr_1e-06"))

In [5]:
df_avg = get_with_delta(Path("NetUpDownCoder4Avg60_ASGD_lr_1e-06"))

In [6]:
df_conv = get_with_delta(Path("NetUpDownCoder4Conv60_ASGD_lr_1e-06"))

In [7]:
df_max_der = get_with_delta(
    Path("derivative", "NetUpDownCoder4Max60_ASGD_lr_1e-06")
)

In [8]:
df_avg_der = get_with_delta(
    Path("derivative", "NetUpDownCoder4Avg60_ASGD_lr_1e-06")
)

In [9]:
df_conv_der = get_with_delta(
    Path("derivative", "NetUpDownCoder4Conv60_ASGD_lr_1e-06")
)

In [10]:
best_rate_no_skip = pd.concat(
    (
        df["best_rate"]
        for df in (
            df_avg_der,
            df_max_der,
            df_conv_der,
            df_avg,
            df_max,
            df_conv,
        )
    ),
    axis=1,
)
best_rate_no_skip.columns = [
    "avg_der",
    "max_der",
    "conv_der",
    "avg",
    "max",
    "conv",
]
no_skip_best_type = best_rate_no_skip.idxmax("columns")
no_skip_best_type.name = "best"
best_rate_no_skip.merge(no_skip_best_type, left_index=True, right_index=True)

Unnamed: 0,avg_der,max_der,conv_der,avg,max,conv,best
2,1.0,0.9697,0.9545,1.0,0.9848,0.9848,avg_der
3,1.0,1.0,1.0,1.0,1.0,1.0,avg_der
4,1.0,1.0,1.0,1.0,1.0,0.973,avg_der
5,1.0,1.0,1.0,1.0,1.0,1.0,avg_der
6,0.988,0.988,0.988,1.0,1.0,1.0,avg
7,1.0,1.0,1.0,0.9881,1.0,1.0,avg_der
8,1.0,1.0,1.0,1.0,1.0,1.0,avg_der
9,0.9318,0.8295,0.8295,0.7727,0.8068,0.7841,avg_der
10,0.9587,0.9504,0.9669,1.0,0.9504,1.0,avg
11,1.0,1.0,1.0,1.0,1.0,1.0,avg_der


In [11]:
best_rate_no_skip.describe()

Unnamed: 0,avg_der,max_der,conv_der,avg,max,conv
count,15.0,15.0,15.0,15.0,15.0,15.0
mean,0.9919,0.980507,0.981913,0.98136,0.982133,0.97948
std,0.019851,0.044555,0.044382,0.058194,0.050235,0.055017
min,0.9318,0.8295,0.8295,0.7727,0.8068,0.7841
25%,1.0,0.979,0.9889,0.99405,0.995,0.9874
50%,1.0,1.0,1.0,1.0,1.0,1.0
75%,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0


In [12]:
df_avg_der

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.1515,0.8485,1.0,13,0.8636,11
3,0.2143,0.7857,1.0,8,0.8,0
4,0.0,1.0,1.0,0,1.0,0
5,0.4217,0.5783,1.0,0,1.0,0
6,0.0121,0.9759,0.988,2,0.8193,12
7,0.0,1.0,1.0,0,0.9286,27
8,0.0889,0.9111,1.0,13,0.9889,0
9,0.125,0.8068,0.9318,2,0.3864,27
10,0.0827,0.876,0.9587,25,0.6033,12
11,0.0,1.0,1.0,0,1.0,0


# Skip users

In [5]:
df_max_skip = get_with_delta(
    Path("skip_users", "NetUpDownCoder4Max60_ASGD_lr_1e-06")
)

In [6]:
df_avg_skip = get_with_delta(
    Path("skip_users", "NetUpDownCoder4Avg60_ASGD_lr_1e-06")
)

In [7]:
df_conv_skip = get_with_delta(
    Path("skip_users", "NetUpDownCoder4Conv60_ASGD_lr_1e-06")
)

In [8]:
df_avg_der_skip = get_with_delta(
    Path("derivative", "skip_users", "NetUpDownCoder4Avg60_ASGD_lr_1e-06")
)

In [9]:
df_max_der_skip = get_with_delta(
    Path("derivative", "skip_users", "NetUpDownCoder4Max60_ASGD_lr_1e-06")
)

In [10]:
df_conv_der_skip = get_with_delta(
    Path("derivative", "skip_users", "NetUpDownCoder4Conv60_ASGD_lr_1e-06")
)

In [11]:
best_rate_skip = pd.concat(
    (
        df["best_rate"] for df in (
            df_avg_der_skip,
            df_max_der_skip,
            df_conv_der_skip,
            df_avg_skip,
            df_max_skip,
            df_conv_skip,
        )
    ),
    axis=1,
)
best_rate_skip.columns = [
    "avg_der",
    "max_der",
    "conv_der",
    "avg",
    "max",
    "conv",
]
skip_best_type = best_rate_skip.idxmax("columns")
skip_best_type.name = "best"
best_rate_skip.merge(skip_best_type, left_index=True, right_index=True)

Unnamed: 0,avg_der,max_der,conv_der,avg,max,conv,best
2,0.9545,0.9545,1.0,0.9545,0.9848,1.0,conv_der
3,1.0,1.0,1.0,1.0,1.0,1.0,avg_der
4,1.0,0.9865,1.0,1.0,1.0,1.0,avg_der
5,1.0,1.0,1.0,1.0,1.0,1.0,avg_der
6,1.0,0.9518,1.0,0.988,0.988,1.0,avg_der
7,1.0,1.0,1.0,1.0,1.0,0.9881,avg_der
8,1.0,1.0,1.0,1.0,1.0,1.0,avg_der
9,0.9091,0.8977,0.7614,0.8409,0.8295,0.8523,avg_der
10,0.9669,0.9256,0.9669,0.9835,0.9421,1.0,conv
11,1.0,1.0,1.0,1.0,1.0,1.0,avg_der


In [51]:
best_rate_skip.columns = ['Avg, Derivative', 'Max, Derivative', 'Conv, Derivative', 'Avg, Default', 'Max, Default', 'Conv, Default']
best_rate_skip.round(3).to_csv()

',"Avg, Derivative","Max, Derivative","Conv, Derivative","Avg, Default","Max, Default","Conv, Default"\n2,0.954,0.954,1.0,0.954,0.985,1.0\n3,1.0,1.0,1.0,1.0,1.0,1.0\n4,1.0,0.986,1.0,1.0,1.0,1.0\n5,1.0,1.0,1.0,1.0,1.0,1.0\n6,1.0,0.952,1.0,0.988,0.988,1.0\n7,1.0,1.0,1.0,1.0,1.0,0.988\n8,1.0,1.0,1.0,1.0,1.0,1.0\n9,0.909,0.898,0.761,0.841,0.83,0.852\n10,0.967,0.926,0.967,0.984,0.942,1.0\n11,1.0,1.0,1.0,1.0,1.0,1.0\n13,1.0,0.991,1.0,1.0,1.0,1.0\n14,1.0,1.0,1.0,1.0,1.0,1.0\n15,1.0,1.0,1.0,1.0,1.0,0.98\n16,1.0,1.0,1.0,1.0,1.0,1.0\n17,1.0,1.0,0.99,1.0,0.98,1.0\n'

In [13]:
(best_rate_skip * 100).describe().round(2)

Unnamed: 0,avg_der,max_der,conv_der,avg,max,conv
count,15.0,15.0,15.0,15.0,15.0,15.0
mean,98.87,98.05,98.12,98.45,98.16,98.8
std,2.61,3.27,6.14,4.16,4.48,3.8
min,90.91,89.77,76.14,84.09,82.95,85.23
25%,100.0,97.05,100.0,99.4,98.64,100.0
50%,100.0,100.0,100.0,100.0,100.0,100.0
75%,100.0,100.0,100.0,100.0,100.0,100.0
max,100.0,100.0,100.0,100.0,100.0,100.0


In [56]:
df_avg_der_skip[["before", "best_rate", "delta_best_before"]].round(3).to_csv()

',before,best_rate,delta_best_before\n2,0.682,0.954,0.273\n3,0.743,1.0,0.257\n4,1.0,1.0,0.0\n5,0.566,1.0,0.434\n6,1.0,1.0,0.0\n7,0.607,1.0,0.393\n8,0.9,1.0,0.1\n9,0.546,0.909,0.364\n10,0.43,0.967,0.537\n11,0.973,1.0,0.027\n13,1.0,1.0,0.0\n14,0.972,1.0,0.027\n15,0.918,1.0,0.082\n16,1.0,1.0,0.0\n17,0.76,1.0,0.24\n'

# old runs

In [21]:
df_05 = get_with_delta(Path("NetUpDownCoder3_3_ASGD_lr_1e-05"))

In [22]:
df_05

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.3088,0.6618,0.9706,28,0.6618,0
3,0.3889,0.6111,1.0,8,0.8333,0
4,0.171,0.7632,0.9342,23,0.8947,0
5,0.2326,0.7674,1.0,0,1.0,0
6,0.1764,0.7765,0.9529,17,0.9059,2
7,0.2758,0.6897,0.9655,2,0.8851,4
8,0.2365,0.7527,0.9892,0,0.9677,2
9,0.1538,0.6044,0.7582,0,0.6264,6
10,0.2661,0.6694,0.9355,12,0.8548,0
11,0.2321,0.7679,1.0,4,0.9732,0


In [23]:
df_06 = get_with_delta(Path("NetUpDownCoder3_3_ASGD_lr_1e-06"))

In [24]:
df_06

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.1912,0.7206,0.9118,41,0.7353,0
3,0.4167,0.5694,0.9861,39,0.8333,7
4,0.3026,0.6842,0.9868,26,0.9079,6
5,0.1512,0.8488,1.0,0,1.0,0
6,0.1412,0.7882,0.9294,2,0.8706,29
7,0.2988,0.6782,0.977,18,0.8851,5
8,0.2796,0.6989,0.9785,5,0.9247,2
9,0.0329,0.7253,0.7582,17,0.5604,9
10,0.2662,0.6935,0.9597,11,0.8065,16
11,0.2411,0.75,0.9911,0,0.9911,0


In [25]:
df_05_skipped = get_with_delta(
    Path("skip_users") / "NetUpDownCoder3_3_ASGD_lr_1e-05"
)

In [26]:
df_05_skipped

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.2206,0.75,0.9706,2,0.8824,8
3,0.4306,0.5694,1.0,0,0.9583,7
4,0.2237,0.7105,0.9342,0,0.8947,1
5,0.2791,0.7209,1.0,0,1.0,0
6,0.1765,0.7882,0.9647,1,0.8824,12
7,0.2873,0.6782,0.9655,2,0.908,22
8,0.2043,0.7849,0.9892,13,0.9677,1
9,0.0989,0.6703,0.7692,1,0.4505,39
10,0.2016,0.7661,0.9677,23,0.5565,31
11,0.1964,0.8036,1.0,0,0.9554,22


In [27]:
df_06_skipped = get_with_delta(
    Path("skip_users") / "NetUpDownCoder3_3_ASGD_lr_1e-06"
)

In [28]:
df_06_skipped

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.2059,0.7059,0.9118,3,0.8824,10
3,0.4444,0.5417,0.9861,3,0.9167,43
4,0.2763,0.7105,0.9868,0,0.8816,22
5,0.1628,0.8372,1.0,0,1.0,0
6,0.1883,0.7529,0.9412,19,0.8706,29
7,0.2873,0.6897,0.977,12,0.9425,14
8,0.1936,0.7849,0.9785,0,0.9785,0
9,0.0329,0.7473,0.7802,19,0.5495,39
10,0.2258,0.7339,0.9597,6,0.8629,34
11,0.1429,0.8482,0.9911,0,0.9732,21


In [29]:
df_06_skipped_norm = get_with_delta(
    Path("normalised").joinpath(
        "skip_users", "after_personalised", "NetUpDownCoder3_3_ASGD_lr_1e-06"
    )
)

In [30]:
df_06_skipped_norm

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.2647,0.6618,0.9265,49,0.6176,13
3,0.4583,0.5417,1.0,2,0.5417,0
4,0.4211,0.5789,1.0,0,0.8816,31
5,0.4302,0.5698,1.0,9,0.5698,0
6,0.2824,0.6235,0.9059,45,0.6235,0
7,0.3678,0.6207,0.9885,45,0.8966,34
8,0.4194,0.5806,1.0,1,0.9677,23
9,0.0,0.6484,0.6484,0,0.6484,0
10,0.3146,0.6048,0.9194,43,0.3952,42
11,0.4911,0.5089,1.0,2,0.5089,0


In [31]:
df_06_norm = get_with_delta(
    Path("normalised").joinpath(
        "common", "after_personalised", "NetUpDownCoder3_3_ASGD_lr_1e-06"
    )
)

In [32]:
df_06_norm

Unnamed: 0,delta_best_before,before,best_rate,best_step,worst_rate,worst_step
2,0.2353,0.6618,0.8971,46,0.3529,18
3,0.4583,0.5417,1.0,2,0.5417,0
4,0.4211,0.5789,1.0,4,0.5789,0
5,0.4302,0.5698,1.0,5,0.5698,0
6,0.2941,0.6235,0.9176,37,0.6235,0
7,0.3563,0.6207,0.977,19,0.6207,0
8,0.4194,0.5806,1.0,6,0.5806,0
9,0.1758,0.6484,0.8242,22,0.5934,7
10,0.2742,0.6048,0.879,49,0.4839,28
11,0.4911,0.5089,1.0,2,0.5089,0


In [33]:
df_05[["before", "worst_rate", "best_rate", "delta_best_before"]].describe()

Unnamed: 0,before,worst_rate,best_rate,delta_best_before
count,15.0,15.0,15.0,15.0
mean,0.692313,0.876587,0.953453,0.26114
std,0.068218,0.113881,0.064385,0.075106
min,0.5755,0.6264,0.7582,0.1538
25%,0.6451,0.84405,0.9442,0.2267
50%,0.6897,0.8947,0.9706,0.24
75%,0.75795,0.97045,0.9955,0.29465
max,0.7765,1.0,1.0,0.4245


In [34]:
df_06[["before", "worst_rate", "best_rate", "delta_best_before"]].describe()

Unnamed: 0,before,worst_rate,best_rate,delta_best_before
count,15.0,15.0,15.0,15.0
mean,0.681187,0.87224,0.956807,0.27562
std,0.080232,0.113207,0.061544,0.108665
min,0.5694,0.5604,0.7582,0.0329
25%,0.6148,0.82665,0.94485,0.21615
50%,0.6842,0.8932,0.9785,0.2988
75%,0.72295,0.94575,0.98895,0.3502
max,0.8488,1.0,1.0,0.4167


In [35]:
df_05_skipped[
    ["before", "worst_rate", "best_rate", "delta_best_before"]
].describe()

Unnamed: 0,before,worst_rate,best_rate,delta_best_before
count,15.0,15.0,15.0,15.0
mean,0.714227,0.87008,0.959687,0.24546
std,0.071281,0.158685,0.060369,0.091216
min,0.5694,0.4505,0.7692,0.0989
25%,0.673,0.8824,0.9651,0.18645
50%,0.7209,0.908,0.9709,0.2206
75%,0.7755,0.963,0.9955,0.3013
max,0.8058,1.0,1.0,0.4306


In [36]:
df_06_skipped[
    ["before", "worst_rate", "best_rate", "delta_best_before"]
].describe()

Unnamed: 0,before,worst_rate,best_rate,delta_best_before
count,15.0,15.0,15.0,15.0
mean,0.729047,0.898307,0.957807,0.22876
std,0.082267,0.108298,0.056388,0.094343
min,0.5417,0.5495,0.7802,0.0329
25%,0.6978,0.8761,0.95045,0.17905
50%,0.7339,0.9167,0.977,0.2136
75%,0.7689,0.9686,0.98895,0.2818
max,0.8482,1.0,1.0,0.4444


In [37]:
df_06_skipped_norm[
    ["before", "worst_rate", "best_rate", "delta_best_before"]
].describe()

Unnamed: 0,before,worst_rate,best_rate,delta_best_before
count,15.0,15.0,15.0,15.0
mean,0.574907,0.710653,0.954887,0.37998
std,0.060248,0.210772,0.091078,0.133898
min,0.4717,0.3952,0.6484,0.0
25%,0.5253,0.55575,0.94555,0.3273
50%,0.5806,0.6235,1.0,0.4194
75%,0.6221,0.9258,1.0,0.4747
max,0.6618,0.9906,1.0,0.5283


## Compare normalised and non normalised

In [38]:
(df_06_norm - df_06)[["before", "best_rate"]]

Unnamed: 0,before,best_rate
2,-0.0588,-0.0147
3,-0.0277,0.0139
4,-0.1053,0.0132
5,-0.279,0.0
6,-0.1647,-0.0118
7,-0.0575,0.0
8,-0.1183,0.0215
9,-0.0769,0.066
10,-0.0887,-0.0807
11,-0.2411,0.0089


In [39]:
(df_06_norm - df_06)[["before", "best_rate"]].describe()

Unnamed: 0,before,best_rate
count,15.0,15.0
mean,-0.10628,0.004933
std,0.078906,0.030998
min,-0.279,-0.0807
25%,-0.1267,0.0
50%,-0.0971,0.0089
75%,-0.0648,0.01355
max,0.04,0.066


In [40]:
(df_06_skipped_norm - df_06_skipped)[["before", "best_rate"]]

Unnamed: 0,before,best_rate
2,-0.0441,0.0147
3,0.0,0.0139
4,-0.1316,0.0132
5,-0.2674,0.0
6,-0.1294,-0.0353
7,-0.069,0.0115
8,-0.2043,0.0215
9,-0.0989,-0.1318
10,-0.1291,-0.0403
11,-0.3393,0.0089


In [41]:
(df_06_skipped_norm - df_06_skipped)[["before", "best_rate"]].describe()

Unnamed: 0,before,best_rate
count,15.0,15.0
mean,-0.15414,-0.00292
std,0.116497,0.042641
min,-0.3585,-0.1318
25%,-0.2437,-0.00445
50%,-0.1294,0.0089
75%,-0.06105,0.0143
max,0.0,0.05
