In [1]:
from scipy.optimize import Bounds
import numpy as np
from scipy.optimize import minimize
import joblib
import os

from deap import base
from deap import creator
from deap import tools

import random

from time import time



In [2]:
NUM_CLUSTERS = 7

In [3]:
CLUSTER_DIM = 7 * NUM_CLUSTERS

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("attr_flt", random.uniform, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_flt,n=CLUSTER_DIM)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

#reduced_matrix_np = joblib.load('reduced_matrix_np.pkl')
#matrix_algorithm = reduced_matrix_np

matrix_np = joblib.load('matrix_np.pkl')
matrix_algorithm = matrix_np

ind_fqua = joblib.load("./results/fobjQuadraticError/best_gen_individuals{}.pkl".format(150))
best_ind_qua = ind_fqua[-1]

#ind_finer = joblib.load("./results/fobjInertia/best_gen_individuals{}.pkl".format(150))
#best_ind_iner = ind_finer[-1]

# Create Folders

In [4]:
path = "./results/convergence/fobjQuadraticError"
try:
    if not os.path.isdir(path):
        os.makedirs(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

Successfully created the directory ./results/convergence/fobjQuadraticError 


In [None]:
path = "./results/convergence/fobjInertia"
try:
    if not os.path.isdir(path):
        os.makedirs(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

# Define Functions

In [5]:
def auxiliarFunction(documents, dimensions, number_topics):

    counter_docs = [0 for x in range(0,number_topics)]
    
    distance_to_cluster_2 = [0 for x in range(0,number_topics)] # distances to cluster
    
    for document in documents: 
        distance_to_cluster = []   
        counter = 0
        for dim in range(0,number_topics): 
            distance_to_cluster.append(np.linalg.norm(document-dimensions[counter:counter+7]))
            counter = counter + 7
            
        best_distance_index = distance_to_cluster.index(min(distance_to_cluster)) 
        
        counter_docs[best_distance_index] += 1
    
        distance_to_cluster_2[best_distance_index] += pow(distance_to_cluster[best_distance_index], 2)
        
     
    return counter_docs, distance_to_cluster_2

In [6]:
def fobjInertia(k):
    
    num_topics = int((len(k))/7)
    
    # 1º Number of documents to each cluster
    counter_documents, euclidean_dist_2 = auxiliarFunction(documents = matrix_algorithm, dimensions= k, number_topics = num_topics)
    
    bool_check = 0 in counter_documents    

   # 2º Fitness fuction
    if(bool_check):
        total_value = 1000000000 # We give a high value to those clusters who do not have documents
        
    else:
        total_value = sum(euclidean_dist_2) # Euclidean distance summatory (K clusters)
    return total_value

In [7]:
def fobjQuadraticError(k):

    num_topics = int((len(k))/7)
    
    # 1º Number of documents to each cluster
    counter_documents, euclidean_dist_2 = auxiliarFunction(documents = matrix_algorithm, dimensions= k, number_topics = num_topics)
    
    bool_check = 0 in counter_documents    

   # 2º Fitness fuction
    if(bool_check):
        total_value = 5 # We give a high value to those clusters who do not have documents
        
    else:
        value1 = 0
        for i in range(0,num_topics): #para cada uno de los clusters
            value2 = euclidean_dist_2[i] 
            value1 += ((1/counter_documents[i])*value2)

        total_value = (value1 / num_topics) 

    return (total_value)

In [8]:
# Para los que necesiten J
def gradfunc(x):
    # Gradient
    num_var = 49
    h=0.001
    grad = [0 for _ in range(0,num_var)]
    current_sol = x
    current_val = fobj2(current_sol)
    
    for i in range(0, num_var): 
        current_sol_plus = current_sol
        current_sol_plus[i] = current_sol_plus[i] + h
        grad[i] = (fobj2(current_sol_plus) - current_val)/h 

    return grad

bounds = Bounds([0]*7*NUM_CLUSTERS,
                [1]*7*NUM_CLUSTERS)

# SLSQP

In [9]:
"""
eps: Step size used for numerical approximation of the Jacobian.

ftol : Precision goal for the value of f in the stopping criterion.

"""
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='SLSQP', jac=None, 
               bounds=bounds, constraints=(), tol=None, callback=None,
               options={'maxiter': 100, 'ftol': 1e-06, 'iprint': 1, 'disp': True, 
               'eps': 1.4901161193847656e-08, 'finite_diff_rel_step': None})
print(res.x)

print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/SLSQP.pkl")

Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.08924405546130854
            Iterations: 4
            Function evaluations: 229
            Gradient evaluations: 4
[0.08684272 0.06227225 0.03515571 0.05971949 0.05488723 0.05150941
 0.64306821 0.0850732  0.11190715 0.55227445 0.06555042 0.05809734
 0.08266257 0.07847096 0.06269864 0.05379456 0.04166713 0.60185493
 0.0572007  0.08347162 0.06775544 0.02873104 0.02823249 0.01914961
 0.87747103 0.03599404 0.02555935 0.01587945 0.0837553  0.09439681
 0.07904164 0.09755949 0.32887999 0.2327117  0.10430703 0.62061141
 0.05991763 0.09473553 0.07046836 0.04633504 0.05341064 0.089253
 0.06423063 0.59635239 0.09266999 0.05677078 0.05627019 0.06551471
 0.07638331]
--- 1599.2768597602844 s seconds ---


['results/convergence/fobjQuadraticError/SLSQP.pkl']

In [10]:
print(fobjQuadraticError(best_ind_qua))

0.0898366747623566


In [None]:
start_time = time()

res = minimize(fun = fobjInertia, x0= best_ind_iner, args=(), method='SLSQP', jac=None, 
               bounds=bounds, constraints=(), tol=None, callback=None,
               options={'maxiter': 100, 'ftol': 1e-06, 'iprint': 1, 'disp': True, 
               'eps': 1.4901161193847656e-08, 'finite_diff_rel_step': None})
print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/SLSQP.pkl")

# BFGS

In [None]:

"""
eps: If jac is None the absolute step size used for numerical approximation of the jacobian via forward differences.
gtol: Gradient norm must be less than gtol before successful termination.

"""
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(),
               method='BFGS', jac=None, tol=None, callback=None, 
               options={'gtol': 1e-06, 'eps': 1.4901161193847656e-08,
               'maxiter': 100, 'disp': True, 'return_all': False, 
               'finite_diff_rel_step': None})
print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/BFGS.pkl")

In [None]:
start_time = time()

res = minimize(fun = fobjInertia, x0= best_ind_iner, args=(),
               method='BFGS', jac=None, tol=None, callback=None, 
               options={'gtol': 1e-06, 'eps': 1.4901161193847656e-08,
               'maxiter': 100, 'disp': True, 'return_all': False, 
               'finite_diff_rel_step': None})
print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/BFGS.pkl")

# Powell

In [None]:
start_time = time()

res =minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='Powell', 
         bounds=None, tol=None, callback=None, options={'xtol': 0.0001,
        'ftol': 1e-06, 'maxiter': 100, 
        'maxfev': None, 'disp': True, 'direc': None, 'return_all': False})
print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/Powell.pkl")

In [None]:
start_time = time()

res =minimize(fun = fobjInertia, x0= best_ind_iner, args=(), method='Powell', 
         bounds=None, tol=None, callback=None, options={'xtol': 0.0001,
        'ftol': 1e-06, 'maxiter': 100, 
        'maxfev': None, 'disp': True, 'direc': None, 'return_all': False})
print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/Powell.pkl")

# CG

In [None]:
start_time = time()

res =minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(),
         method='CG', jac=None, tol=None, callback=None, 
         options={'gtol': 1e-06, 'eps': 1.4901161193847656e-08,
        'maxiter': 100, 'disp': True, 'return_all': False, 'finite_diff_rel_step': None})

print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/CG.pkl")

In [None]:
start_time = time()

res =minimize(fun = fobjInertia, x0= best_ind_iner, args=(),
         method='CG', jac=None, tol=None, callback=None, 
         options={'gtol': 1e-06, 'eps': 1.4901161193847656e-08,
        'maxiter': 100, 'disp': True, 'return_all': False, 'finite_diff_rel_step': None})

print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/CG.pkl")

# L-BFGS-B

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), 
         method='L-BFGS-B', jac=None,
         bounds=bounds, tol=None, callback=None, 
         options={'disp': None, 'maxcor': 10, 
        'ftol': 2.220446049250313e-09, 'gtol': 1e-05, 'eps': 1e-08, 'maxfun': 15000, 'maxiter': 15000, 
        'iprint': - 1, 'maxls': 20,
        'finite_diff_rel_step': None})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/L-BFGS-B.pkl")

In [None]:
start_time = time()

res = minimize(fun = fobjInertia, x0= best_ind_iner, args=(), 
         method='L-BFGS-B', jac=None,
         bounds=bounds, tol=None, callback=None, 
         options={'disp': None, 'maxcor': 10, 
        'ftol': 2.220446049250313e-09, 'gtol': 1e-05, 'eps': 1e-08, 'maxfun': 15000, 'maxiter': 15000, 
        'iprint': - 1, 'maxls': 20,
        'finite_diff_rel_step': None})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/L-BFGS-B.pkl")

# TNC

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='TNC',
        jac=None, bounds=bounds, tol=None, callback=None, 
         options={'eps': 1.4901161193847656e-08, 'scale': None, 'offset': None, 
        'mesg_num': None, 'maxCGit': - 1, 'maxiter': None, 'eta': - 1, 
        'stepmx': 0, 'accuracy': 0, 'minfev': 0, 'ftol': - 1, 'xtol': - 1, 
        'gtol': - 1, 'rescale': - 1, 'disp': True,
        'finite_diff_rel_step': None, 'maxfun': 100})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/TNC.pkl")

In [None]:
start_time = time()

res = minimize(fun = fobjInertia, x0= best_ind_iner, args=(), method='TNC',
        jac=None, bounds=bounds, tol=None, callback=None, 
         options={'eps': 1.4901161193847656e-08, 'scale': None, 'offset': None, 
        'mesg_num': None, 'maxCGit': - 1, 'maxiter': None, 'eta': - 1, 
        'stepmx': 0, 'accuracy': 0, 'minfev': 0, 'ftol': - 1, 'xtol': - 1, 
        'gtol': - 1, 'rescale': - 1, 'disp': True,
        'finite_diff_rel_step': None, 'maxfun': 100})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/TNC.pkl")

# COBYLA

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='COBYLA', constraints=(), tol=2.220446049250313e-09, callback=None, 
         options={'rhobeg': 1.0, 'maxiter': 1000, 
                  'disp': False, 'catol': 2.220446049250313e-09})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/COBYLA.pkl")

In [None]:
start_time = time()

res = minimize(fun = fobjInertia, x0= best_ind_iner, args=(), method='COBYLA', constraints=(), tol=2.220446049250313e-09, callback=None, 
         options={'rhobeg': 1.0, 'maxiter': 1000, 
                  'disp': False, 'catol': 2.220446049250313e-09})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/COBYLA.pkl")

# trust-constr

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='trust-constr', hess=None, hessp=None, bounds=bounds, constraints=(), tol=None, callback=None, 
        options={'xtol': 1e-08, 'gtol': 1e-08, 'barrier_tol': 1e-08, 
        'sparse_jacobian': None, 'maxiter': 1000, 'verbose': 0, 'finite_diff_rel_step': None, 'initial_constr_penalty': 1.0, 
        'initial_tr_radius': 1.0, 'initial_barrier_parameter': 0.1, 
        'initial_barrier_tolerance': 0.1, 'factorization_method': None, 'disp': True})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/trust-constr.pkl")

In [None]:
start_time = time()

res = minimize(fun = fobjInertia, x0= best_ind_iner, args=(), method='trust-constr', hess=None, hessp=None, bounds=bounds, constraints=(), tol=None, callback=None, 
        options={'xtol': 1e-08, 'gtol': 1e-08, 'barrier_tol': 1e-08, 
        'sparse_jacobian': None, 'maxiter': 1000, 'verbose': 0, 'finite_diff_rel_step': None, 'initial_constr_penalty': 1.0, 
        'initial_tr_radius': 1.0, 'initial_barrier_parameter': 0.1, 
        'initial_barrier_tolerance': 0.1, 'factorization_method': None, 'disp': True})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjInertia/trust-constr.pkl")