In [None]:
import os
import json

import pandas as pd
import numpy as np

from itertools import permutations

# Helpers

In [None]:
# Tells if a list is ordered
def is_ordered(l):
    return all(l[i] <= l[i+1] for i in range(len(l)-1))

# Gives 1 if list is in decreasing order, -1 if increasing, 0 otherwise
def check_order(l):
    return is_ordered(l[::-1]) - is_ordered(l)

# Gives 1 if all lists are in decreasing order, 1 if all are increasing, 0 otherwise
def bulk_check_order(ls):
    vals = set([check_order(l) for l in ls])
    return 0 if len(vals) > 1 else vals.pop()

# Separate ranking in groups where the corresponding value is the same
def break_down(values, ranking):
    res = []
    mem = None

    for v, r in zip(values, ranking):
        if mem != v:
            res.append(list())
            mem = v

        res[-1].append(r)

    return [tuple(l) for l in res]

def filter_reasonings(reasonings, discard):
    filtered = []
    
    for r in reasonings:
        # Get the labels of this result
        labels = [elem[0] for elem in r]
        catch = False
        
        for dis in discard:
            # Set catch to True if all elements in the current discarding list are in the labels of this result
            catch |= all(elem in labels for elem in dis)
            
        # If this result did not match with any discarding list, keep it
        if not catch:
            filtered.append(r)
            
    return filtered

# Generate all possible reasonings leading to a given ranking based on a list of values for given properties
def sequences(ranking, elems, discard):
    results = list()
    
    for perm in permutations(range(len(elems))):
        curr_res = list()
        keys = list(elems.keys())
        curr = [ranking]
        
        for i in perm:
            e = elems[keys[i]]
            
            # Sort current components according to the ranking
            outputs = [tuple(e[r] for r in c) for c in curr]
            ordered = bulk_check_order(outputs)
            
            if ordered:
                curr_res.append((keys[i], ordered))
                breakdowns = [break_down(o, c) for o, c in zip(outputs, curr)]
                curr = [x for x in [item for sublist in breakdowns for item in sublist] if len(x) > 1]
                
                if len(curr) == 0:
                    break
            else:
                break
                
        if len(curr) == 0 and len(curr_res):
            results.append(tuple(curr_res))
            
    results = set(results)
    results = filter_reasonings(results, discard)
    
    return results if len(results) else None

# Configuration

In [None]:
widths =         [2.5,   4,   5,   3]
concentrations = [600, 500, 400, 750]
colors =         [  1,   0,   1,   0]
shades =         [  1,   0,   0,   1]

elems = {
    "widths": widths,
    "concentrations": concentrations,
    "colors": colors,
    "shades": shades,
    "w_c_product": [w * c for w, c in zip(widths, concentrations)],
    "w_c_ratio": [w / c for w, c in zip(widths, concentrations)]
}

discard = [
    ("widths", "w_c_ratio"),
    ("widths", "w_c_product"),
    ("concentrations", "w_c_ratio"),
    ("concentrations", "w_c_product"),
    ("w_c_ratio", "w_c_product")
]

# Data

In [None]:
data_path = ["data"]

good_rank = tuple([np.int64(x) for x in (2, 0, 3, 1)])

df = pd.read_pickle(os.path.join(*data_path, "post_test.pkl"))
df

# Processing

In [None]:
ans = df[6]["ranks"]
valid_ans = ans[~ans.isnull()]
valid_ans

In [None]:
rank_choices = pd.DataFrame()
rank_choices = valid_ans.apply(pd.Series)
rank_choices["year"] = df[0]["year"]
rank_choices.groupby("year")

elem_counts = []

for k1, g1 in rank_choices[[0, 1, 2, 3]].groupby([0, 1, 2, 3]):
    elem_counts.append((len(g1), k1))
    print("\t", k1, len(g1), "<-------" if k1 == good_rank else "")
    
elem_counts = {v: k for k, v in elem_counts}

In [None]:
import pprint

strats = dict()

rank_df = {}
strats_df = {}

j = 0

for i, p in enumerate(permutations(range(4))):
    strats[p] = sequences(p, elems, discard)
    #print(p)
    try:
        pass
        #print(elem_counts[p], end=" / ")
    except Exception as e:
        pass
    
    ls = 0
    try:
        ls = len(strats[p])
        #print(len(strats[p]))
    except Exception:
        #print(0)
        pass
    
    if strats[p] is None:
        rank_df[i] = {
            "perm": p,
            "count": elem_counts[p],
            "ls": ls,
            "strats": strats[p]
        }
        continue
    
    for s in strats[p]:
        strats_df[j] = {
            "strat": s,
            "count": elem_counts[p],
            "ls": ls,
            "perm": p,
        }
        
        j += 1
        
    rank_df[i] = {
        "perm": p,
        "count": elem_counts[p],
        "ls": ls,
        "strats": strats[p]
    }
    
rank_df = pd.DataFrame(rank_df).T

strats_df = pd.DataFrame(strats_df).T
strats_df["elems"] = strats_df["strat"].apply(lambda s: tuple(x[0] for x in s))
strats_df = pd.concat({"main": strats_df}, axis=1)

for e in elems.keys():
    strats_df[("pres", e)] = strats_df["main"]["elems"].apply(lambda x: int(e in x))
    strats_df[("pos", e)] = strats_df.apply(lambda x: x["main"]["elems"].index(e) if x["pres"][e] else np.nan, axis=1)
    strats_df[("dir", e)] = strats_df.apply(lambda x: x["main"]["strat"][int(x["pos"][e])][1] if x["pres"][e] else 0, axis=1)
    
strats_df = strats_df.drop([("main", x) for x in ["count", "elems", "ls", "strat"]], axis=1)
    
strats_df = strats_df.sort_index(level=0, axis=1)
gbs = strats_df.groupby(("main", "perm"))

temp = []

for p in rank_df["perm"]:
    val = strats_df[strats_df["main"]["perm"] == p].drop(("main", "perm"), axis=1).mean(axis=0).to_dict()
    val[("main", "perm")] = p
    val[("main", "count")] = elem_counts[p]
    val[("main", "strats")] = len(strats[p]) if strats[p] is not None else 0
    temp.append(val)

rd = pd.DataFrame(temp, columns=pd.MultiIndex.from_tuples(temp[0].keys()))

orig_rd = rd.copy()

sol_no_strat = rd[rd["pres"]["colors"].isnull()]
rd = rd[~rd.index.isin(sol_no_strat.index)]

sol_no_color = rd[rd["pres"]["colors"] == 0][["pres", "main"]]
rd = rd[~rd.index.isin(sol_no_color.index)]

sol_width_color = rd[(rd["pres"]["colors"] < 1) & (rd["pres"]["concentrations"] > 0)]
rd = rd[~rd.index.isin(sol_width_color.index)]

sol_prod_color = rd[rd["pos"]["colors"] == 1]
rd = rd[~rd.index.isin(sol_prod_color.index)]

sol_color_width = rd[rd["pos"]["colors"] > 0]
rd = rd[~rd.index.isin(sol_color_width.index)]

sol_color_shade_inv_bad = rd[(rd["pres"]["w_c_product"] == 0) & (rd["dir"]["colors"] < 0)]
rd = rd[~rd.index.isin(sol_color_shade_inv_bad.index)]

sol_color_shade_inv = rd[(rd["pres"]["w_c_product"] == 0) & (rd["dir"]["concentrations"] < 0)]
rd = rd[~rd.index.isin(sol_color_shade_inv.index)]

sol_color_shade = rd[rd["pres"]["w_c_product"] == 0]
rd = rd[~rd.index.isin(sol_color_shade.index)]

sol_good_inv = rd[rd["dir"]["colors"] < 0]
rd = rd[~rd.index.isin(sol_good_inv.index)]

sol_good = rd
sol_good

sols = [
    sol_no_strat,
    sol_no_color,
    sol_width_color,
    sol_prod_color,
    sol_color_width,
    sol_color_shade_inv_bad,
    sol_color_shade_inv,
    sol_color_shade,
    sol_good_inv,
    sol_good
]

gr_bad = [
    sol_no_strat,
    sol_no_color,
    sol_width_color,
    sol_prod_color,
]

gr_mid = [
    sol_color_width,
    sol_color_shade_inv_bad,
    sol_color_shade_inv,
    sol_color_shade,
]

gr_good = [
    sol_good_inv,
    sol_good
]

[s["main"]["count"].sum(axis=0) for s in sols]
temp = pd.concat(gr_mid, axis=0)
df_mid_inv = temp[temp["dir"]["colors"] < 0]
df_mid_ok = temp[temp["dir"]["colors"] > 0]
df_mid = temp

df_good = temp = pd.concat(gr_good, axis=0)
df_bad = temp = pd.concat(gr_bad, axis=0)
df_bad


all_dfs = {
    "mid_ok": df_mid_ok,
    "mid_inv": df_mid_inv,
    "good": df_good,
    "bad": df_bad
}

res = dict()

for k, v in all_dfs.items():
    for p in v["main"]["perm"]:
        new_p = "".join([str(x) for x in p])
        res[new_p] = k

with open("rank_to_group_4.json", "w") as f:
    json.dump(res, f, indent=4)

for k, v in res.items():
    if "mid" in v:
        res[k] = "mid"

with open("rank_to_group_3.json", "w") as f:
    json.dump(res, f, indent=4)

In [None]:
sum([s["main"]["count"].sum() for s in sols])

In [None]:
mp = strats_df["main"]["perm"]
strats_df = strats_df.drop(("main", "perm"), axis=1)
strats_df[("main", "perm")] = mp
strats_df

In [None]:
rank_df.loc[14]["strats"]

In [None]:
rank_df.loc[14]

In [None]:
elems