In [1]:
import numpy as np
import scipy.sparse as sp
import pandas as pd
import numba as nb

In [2]:
argl = [i.rstrip("\n") for i in open("proj1_parameter-file.txt", "r")]
argl

['MIS(10) = 0.43',
 'MIS(20) = 0.30',
 'MIS(30) = 0.30',
 'MIS(40) = 0.40',
 'MIS(50) = 0.40',
 'MIS(60) = 0.30',
 'MIS(70) = 0.20',
 'MIS(80) = 0.20',
 'MIS(90) = 0.20',
 'MIS(100) = 0.10',
 'MIS(120) = 0.20',
 'MIS(140) = 0.15',
 'SDC = 0.1',
 'cannot_be_together: {20, 40}, {70, 80}',
 'must-have: 20 or 40 or 50']

In [3]:
ms = pd.Series(dict([(int(j[0][4:-1]), float(j[1])) for j in [i.split(" = ") for i in argl if i.startswith("MIS")]])).sort_values().reset_index()
ms

Unnamed: 0,index,0
0,100,0.1
1,140,0.15
2,70,0.2
3,80,0.2
4,90,0.2
5,120,0.2
6,20,0.3
7,30,0.3
8,60,0.3
9,40,0.4


In [4]:
id_dict = ms["index"].to_dict()
id_dict_inv = {val: key for key, val in id_dict.items()}
id_dict, id_dict_inv

({0: 100,
  1: 140,
  2: 70,
  3: 80,
  4: 90,
  5: 120,
  6: 20,
  7: 30,
  8: 60,
  9: 40,
  10: 50,
  11: 10},
 {10: 11,
  20: 6,
  30: 7,
  40: 9,
  50: 10,
  60: 8,
  70: 2,
  80: 3,
  90: 4,
  100: 0,
  120: 5,
  140: 1})

In [5]:
ms_dict = ms[0].to_dict()
ms_dict

{0: 0.10000000000000001,
 1: 0.14999999999999999,
 2: 0.20000000000000001,
 3: 0.20000000000000001,
 4: 0.20000000000000001,
 5: 0.20000000000000001,
 6: 0.29999999999999999,
 7: 0.29999999999999999,
 8: 0.29999999999999999,
 9: 0.40000000000000002,
 10: 0.40000000000000002,
 11: 0.42999999999999999}

In [6]:
s = pd.read_csv("proj1_input-data.txt", header = None, sep = "\t",squeeze = True)
s

0                   {20, 30, 80, 70, 50, 90}
1                           {20, 10, 80, 70}
2                               {10, 20, 80}
3                               {20, 30, 80}
4                                   {20, 80}
5    {20, 30, 80, 70, 50, 90, 100, 120, 140}
Name: 0, dtype: object

In [7]:
da = s.str[1:-1].str.get_dummies(sep = ", ").rename(columns = lambda x: id_dict_inv[int(x)]).reindex(columns = ms.index, fill_value = 0).values
da

array([[0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0],
       [0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0]])

In [8]:
def mis(L):
    op = [min([ms_dict[j] for j in i]) for i in L]
    return(op)

In [9]:
def sup(L):
    op = np.vstack(da[:, i].all(axis = 1) for i in L).mean(axis = 1)
    return(op)

In [10]:
Li = (da.mean(axis = 0) > ms[0].values).argmax()
L = np.array([ms.loc[Li:, 0].index[da[:, Li:].mean(axis = 0) > ms[0][Li]]]).T
Li, L

(0, array([[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [10],
        [11]]))

In [11]:
Lsup = sup(L)
Lsup

array([ 0.16666667,  0.16666667,  0.5       ,  1.        ,  0.33333333,
        0.16666667,  1.        ,  0.5       ,  0.33333333,  0.33333333])

In [12]:
Lmis = mis(L)
Lmis

[0.10000000000000001,
 0.14999999999999999,
 0.20000000000000001,
 0.20000000000000001,
 0.20000000000000001,
 0.20000000000000001,
 0.29999999999999999,
 0.29999999999999999,
 0.40000000000000002,
 0.42999999999999999]

In [13]:
F = [L[Lsup > Lmis]]
F

[array([[0],
        [1],
        [2],
        [3],
        [4],
        [6],
        [7]])]

In [18]:
pd.DataFrame({"MIS": Lmis, "Support": Lsup}, index = ms.loc[L.T[0], "index"])

Unnamed: 0_level_0,MIS,Support
index,Unnamed: 1_level_1,Unnamed: 2_level_1
100,0.1,0.166667
140,0.15,0.166667
70,0.2,0.5
80,0.2,1.0
90,0.2,0.333333
120,0.2,0.166667
20,0.3,1.0
30,0.3,0.5
50,0.4,0.333333
10,0.43,0.333333


In [26]:
def sup_mis(x_sup, x_mis):
    x_sup_t = x_sup[:, np.newaxis]
    op = sp.coo_matrix(np.triu((x_sup_t >= x_mis).T & (np.abs(x_sup_t - x_sup) < phi), 1)).nonzero()
    return(op)

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
        2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7],
       [1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 6, 7, 8,
        9, 4, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 7, 8, 9, 8, 9]], dtype=int32)

In [19]:
phi = 1
def sup_mis(x_sup, x_mis):
    x_sup_t = x_sup[:, np.newaxis]
    op = sp.coo_matrix(np.triu((x_sup_t >= x_mis).T & (np.abs(x_sup_t - x_sup) < phi), 1)).nonzero()
    return(op)
def l2_candidate_gen(L):
    Lflat = L.T[0]
    C = , 1))
    C = np.array([Lflat[i] for i in sup_mis(Lsup, Lmis)]).T
    C = C[np.in1d(C[:,0], F[0].T[0])]
    return(C)

In [20]:
C = l2_candidate_gen(L)
C

array([[ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 0,  5],
       [ 0,  6],
       [ 0,  7],
       [ 0, 10],
       [ 0, 11],
       [ 1,  2],
       [ 1,  3],
       [ 1,  4],
       [ 1,  5],
       [ 1,  6],
       [ 1,  7],
       [ 1, 10],
       [ 1, 11],
       [ 2,  3],
       [ 2,  4],
       [ 2,  6],
       [ 2,  7],
       [ 2, 10],
       [ 2, 11],
       [ 3,  4],
       [ 3,  6],
       [ 3,  7],
       [ 3, 10],
       [ 3, 11],
       [ 4,  6],
       [ 4,  7],
       [ 4, 10],
       [ 4, 11],
       [ 6,  7],
       [ 6, 10],
       [ 6, 11],
       [ 7, 10],
       [ 7, 11]])

In [21]:
F.append(C[sup(C) >= mis(C)])
F[-1]

array([[ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 0,  5],
       [ 0,  6],
       [ 0,  7],
       [ 0, 10],
       [ 1,  2],
       [ 1,  3],
       [ 1,  4],
       [ 1,  5],
       [ 1,  6],
       [ 1,  7],
       [ 1, 10],
       [ 2,  3],
       [ 2,  4],
       [ 2,  6],
       [ 2,  7],
       [ 2, 10],
       [ 3,  4],
       [ 3,  6],
       [ 3,  7],
       [ 3, 10],
       [ 3, 11],
       [ 4,  6],
       [ 4,  7],
       [ 4, 10],
       [ 6,  7],
       [ 6, 10],
       [ 6, 11],
       [ 7, 10]])

In [23]:
Lbase = F[-1][0, :-1]
Ldiff = F[-1][:8, -1]

In [24]:
Lbase

array([0])

In [25]:
sup_mis(sup(L), mis(L))

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [False, False,  True,  True,  True, False,  True,  True,  True,
         True],
       [False, False,  True,  True, False, False,  True,  True, False,
        False],
       [False, False,  True,  True, False, False,  True,  True, False,
        False]], dtype=bool)

In [22]:
def pair_gen(Lbase, Ldiff):
    

SyntaxError: unexpected EOF while parsing (<ipython-input-22-8593fa236048>, line 2)

In [14]:
def ms_candidate_gen(L, phi = 1):
    

SyntaxError: unexpected EOF while parsing (<ipython-input-14-d6d9bdbf8167>, line 2)