# Parameters Search

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse.linalg import svds
import scipy.sparse as sp
from kmodes.kmodes import KModes

In [None]:
R_bin = np.load('utility_matrix.npy')

## Number of clusters

In [None]:
random_columns_indices = np.random.choice(R_bin.shape[1], 1500, replace=False) #sottoinsieme di items
cost = []
K = range(1, 20)  
for k in K:
    kmode = KModes(n_clusters=k, init='Huang', n_init=1)
    kmode.fit(R_bin[:,random_columns_indices])
    cost.append(kmode.cost_)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(K, cost, 'bx-')
plt.xlabel('Number of cluster (k)')
plt.ylabel('Cost of disagreement')
plt.xticks(np.arange(0, 21, 1))
plt.show()

In [None]:
"""kmode = KModes(n_clusters=7, init='Huang', n_init=5)
clusters = kmode.fit_predict(R_bin)
np.save('clusters.npy', R_bin)"""

In [None]:
clusters = np.load('clusters.npy')
num_clust = np.unique(clusters).size

## Global factors search

In [None]:
R_global_sparse = sp.csr_matrix(R_bin)

user_global, sigma_global, item_global = svds(R_global_sparse, k=100)

sigma_desc = np.sort(sigma_global)[::-1]

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(sigma_desc[:100], 'bx-')
plt.xlabel('Number of latent Factor')
plt.ylabel('Weight of the factor')
plt.xticks(np.arange(0, 101, 5))
plt.show()

## Local factors search

In [None]:
R_local = [None] * num_clust # List for local matrix
U_local = [None] * num_clust
sigma_local = [None] * num_clust
Vt_local = [None] * num_clust

for i in range(num_clust): #Local matrix initialization
   
    user_indices_in_cluster = np.where(clusters == i)[0]

    R_local[i] = R_bin[user_indices_in_cluster, :]

    
    R_local_sparse = sp.csr_matrix(R_local[i])    
    # SVD on local cluster
    U_loc, sigma_loc_diag, Vt_loc = svds(R_local_sparse, k=20)
    
    U_local[i] = U_loc
    Vt_local[i] = Vt_loc
    sigma_local[i] = np.sort(sigma_loc_diag)[::-1]


In [None]:
fig, axes = plt.subplots(num_clust, 1, figsize=(10, 5*num_clust))

for i, arr in enumerate(sigma_local):
    x = np.arange(1, len(arr)+1)
    axes[i].plot(x, arr, marker='o', color=f"C{i}")
    axes[i].set_ylabel(f'Cluster {i+1}')
    axes[i].grid(True)
    axes[i].set_xlabel('Latent Factor')
    axes[i].set_xticks(x)
plt.tight_layout()
plt.show()
