In [None]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [None]:
import cvxpy as cp
import numpy as np
from gcspy import GraphOfConvexSets

In [None]:
n_movies = 20
n_categories = 2
n_clusters = 3

np.random.seed(0)
scores = np.random.rand(n_movies, n_categories)

In [None]:
gcs = GraphOfConvexSets()

clusters = []
for i in range(n_clusters):
    v = gcs.add_vertex(f"cluster{i}")
    x = v.add_variable(n_categories)
    v.add_constraint(x >= 0)
    v.add_constraint(x <= 1)
    clusters.append(v)

movies = []
for i, score in enumerate(scores):
    v = gcs.add_vertex(f"movie{i}")
    x = v.add_variable(n_categories)
    v.add_constraint(x == score)
    movies.append(v)
    
for cluster in clusters:
    for movie in movies:
        edge = gcs.add_edge(cluster, movie)
        edge.add_cost(cp.sum_squares(cluster.variables[0] - movie.variables[0]))

In [None]:
prob = gcs.solve_facility_location()
print('Problem status:', prob.status)
print('Optimal value:', prob.value)

In [None]:
import matplotlib.pyplot as plt
plt.figure()
plt.gca().set_aspect('equal')
# plt.axis('off')

colors = ['r', 'g', 'b']
for i, cluster in enumerate(clusters):
    color = colors[i]
    x = cluster.variables[0].value
    plt.scatter(*x, color=color, marker='x')
    for j, movie in enumerate(movies):
        edge = gcs.get_edge(cluster, movie)
        if edge.y.value is not None and edge.y.value > .99:
            x = movie.variables[0].value
            plt.scatter(*x, color=color, marker='o')

# plt.savefig('clustering.pdf')