In [1]:
import copy
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
import autograd.numpy as np
from sklearn import datasets, preprocessing
import pandas as pd
from pymanopt.solvers import TrustRegions
from manopt_dr.core import gen_ldr
from manopt_dr.predefined_func_generator import *
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from scipy.spatial import ConvexHull, convex_hull_plot_2d
from scipy.interpolate import interp1d

In [2]:
# Generate data

dataset = datasets.load_wine()

X = dataset.data
y = dataset.target
n_samples, n_features = X.shape
n_components = 2
X = preprocessing.scale(X)


In [3]:
# generalized cPCA

GCPCA = gen_ldr(gen_cost_gcpca, gen_default_proj)
label_set = [0,1,2]


def return_partition(my_list):
    filtered=[]
    for l in range(1,len(my_list)):
        for c in itertools.combinations(my_list,l):
            filtered.append(c)
    return filtered

partitions = return_partition(label_set)

def print_groups(groups):
    s = ""
    for g in range(len(groups)):
        if g == len(groups) - 1:
            s += str(groups[g])
        else:
            s += str(groups[g]) + ","
    return s

def print_groups_index(index):
    groups =  partitions[index]
    s = ""
    for g in range(len(groups)):
        if g == len(groups) - 1:
            s += str(groups[g])
        else:
            s += str(groups[g]) + ","
    return s

for p in partitions:
    print(print_groups(p))
    
def encircle(x,y, ax=None, **kw):
    if not ax: ax=plt.gca()
    p = np.c_[x,y]
    hull = ConvexHull(p)
    x = p[hull.vertices, 0]
    y = p[hull.vertices, 1]
    fx = interp1d(range(len(x)), x, kind='cubic')
    fy = interp1d(range(len(y)), y, kind='cubic')
    # generate new x-, y-coords
    n_interp_points = 1000
    x_new = fx(np.linspace(0, len(x) - 1, n_interp_points))
    y_new = fy(np.linspace(0, len(y) - 1, n_interp_points))
    #poly = plt.Polygon(p[hull.vertices, :],**kw)
    print(hull.vertices)
    #print(x)
    #print(y)
    #print(x.shape, y.shape)
    #print(x_new)
    #print(y_new)
    #print(x_new.shape, y_new.shape)
    vertices = np.ones(2000).reshape(1000,2)
    vertices[:,0] = x_new
    vertices[:,1] = y_new
    #vertices = np.concatenate((x_new,y_new)).reshape(1000,2)
    print(vertices)
    print(vertices.shape)
    poly = plt.Polygon(vertices, **kw)
    ax.add_patch(poly)

    
def Trials(g1,g2):    

#    print(par[0])
#    print(par[1])
    y_tg = copy.deepcopy(y)
    y_tg = np.asarray(y_tg)

    for i in g1:
        y_tg[y_tg == i] = -1

    y_bg = copy.deepcopy(y)
    y_bg = np.asarray(y_bg)

    for i in g2:
        y_bg[y_bg == i] = -1

    gcpca = GCPCA(n_components=n_components).fit(X, y_tg, y_bg)
    Z = gcpca.transform(X)
    cost = gcpca.get_final_cost()
    contrastiveness = 1 / cost

    # Plot
    
    #print(Z)
    #if index1 == index2:
    #    return contrastiveness
    #print(print_groups(index1))
    #print(print_groups(index2))
    plt.figure(figsize=(10, 10))

    tg_x = [Z[i][0] for i in range(len(y)) if y[i] in g1]
    tg_y = [Z[i][1] for i in range(len(y)) if y[i] in g1]
    bg_x = [Z[i][0] for i in range(len(y)) if y[i] in g2]
    bg_y = [Z[i][1] for i in range(len(y)) if y[i] in g2]
    
    # The target has the maximum variance
    tg_x,tg_y,bg_x, bg_y = bg_x, bg_y, tg_x, tg_y
    #for i in range(len(Z)):
    #    if y[i] in g1:
    #        plt.plot(Z[i][0], Z[i][1],  'ro')
    #    else:
    #        plt.plot(Z[i][0], Z[i][1],  'ko')
    #tg = np.array([[tg_x[i],tg_y[i]] for i in range(len(tg_x))])
    #bg = np.array([[bg_x[i],bg_y[i]] for i in range(len(bg_x))])
    #hull_tg = ConvexHull(tg)
    #hull_bg = ConvexHull(bg)
    
    plt.scatter(tg_x, tg_y, marker='^',label = "target", color='r')
    plt.scatter(bg_x, bg_y, marker='o',label = "background", color='black')
    #print(hull_tg.vertices)
    #plt.plot(tg[hull_tg.vertices,0], tg[hull_tg.vertices,1], 'r--', lw=2)
    for g in g1:
        encircle(Z[y==g,0],Z[y==g,1],ec="k",fc="pink",alpha = 0.3)
    for g in g2:
        encircle(Z[y==g,0],Z[y==g,1],ec="k",fc="#7dc8e5",alpha = 0.3)
    #encircle(tg_x, tg_y, ec="k", fc="pink", alpha=0.3)
    #encircle(bg_x, bg_y, ec="k", fc="#7DC8E5", alpha=0.3)
    #plt.plot(tg[hull_tg.vertices[0],0], tg[hull_tg.vertices[0],1], 'ro')
    plt.legend(loc='best', shadow=False, scatterpoints=1)
    plt.title(
        f'Generalized cPCA of Wine dataset, target = ({print_groups(g2)}), background = ({print_groups(g1)})  (cost: {cost:.3f}, contrastiveness: {contrastiveness:.3f})' ,
        fontsize=8)
    #plt.show()
    plt.savefig(f'{print_groups(g2)} - {print_groups(g1)}.png')
    plt.clf()
    
    return contrastiveness


0
1
2
0,1
0,2
1,2


In [4]:
map_dataset = []
for i in range(len(partitions)):
    row = []
    rec = [l for l in label_set if l not in partitions[i]]
    c = Trials(partitions[i],rec)
    row = [ list(partitions[i]), rec ,c]
    map_dataset.append(row)

#map_dataset = pd.DataFrame(map_dataset)


[15 29  9  6 49 26  2 42 10]
[[0.28957505 0.77507537]
 [0.28867926 0.77338607]
 [0.28779171 0.77170018]
 ...
 [0.99973243 1.2946785 ]
 [1.00090495 1.30019044]
 [1.00209391 1.30572082]]
(1000, 2)
[ 2 24 46 62 36 14 15 35  0]
[[-0.79613895 -0.61491392]
 [-0.7870082  -0.62849964]
 [-0.77802918 -0.64198316]
 ...
 [-0.5765208   1.27373369]
 [-0.5901246   1.25960562]
 [-0.60390621  1.24531707]]
(1000, 2)
[28 23 19 20  0  3  9]
[[-2.5483471  -2.68915253]
 [-2.5499587  -2.70046454]
 [-2.55148055 -2.71162331]
 ...
 [-1.27651046 -0.42484144]
 [-1.28291651 -0.4281342 ]
 [-1.28930777 -0.43139192]]
(1000, 2)
[62 11 41 52 26  9  7 24]
[[-0.69905612 -0.19643364]
 [-0.70062465 -0.2016414 ]
 [-0.70219331 -0.20677714]
 ...
 [-0.08591829  0.15919697]
 [-0.08147824  0.15066424]
 [-0.07692561  0.14194455]]
(1000, 2)
[ 0  3 49 18 31 41 38 22 24 25 39]
[[ 0.90650765 -0.25308841]
 [ 0.90942983 -0.23853339]
 [ 0.91216559 -0.22404321]
 ...
 [ 0.81676963 -0.64359251]
 [ 0.81701689 -0.64088543]
 [ 0.8172164  -0.6

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>

In [6]:
#for x in map_dataset:
#    print(x)
columns = [("T : {" + print_groups(x[0]) +  "}",  "B : {" + print_groups(x[1]) + "}") for x in map_dataset]
values = [round(x[2],6) for x in map_dataset]
#print(map_dataset)
#print(columns)
columns.insert(0," ")
values.insert(0,"Contrast")
#print(rows)

#values = pd.DataFrame(values,columns=['Contrastiveness'])
#print(values)

fig = go.Figure(data=[go.Table(header=dict(values = columns, align = "left",   font=dict(color='white', size=12)),
                 cells=dict(values = values))
                              ])
fig.write_image("Table.png")
import csv
print("Contrastiveness")
row_list = [["Target and Background set", "Contrastiveness"]]
for i in range(len(columns)):
    row_list.append([columns[i] ,values[i]])

with open('Wine_combinations.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(row_list)
#map_dataset.rename(columns=lambda s: print_groups_index(s), index=lambda s: print_groups_index(s), inplace = True )
#sns.heatmap(map_dataset,cmap="YlGnBu", linewidths=.5)
#plt.xlabel("Target")
#plt.ylabel("Background")
#plt.gca().invert_yaxis()
#plt.show()
#plt.savefig("heatmap_wine.png")

Contrastiveness
