In [1]:
from scipy.optimize import Bounds
import numpy as np
from scipy.optimize import minimize
import joblib
import os

from deap import base
from deap import creator
from deap import tools

import random

from time import time



In [2]:
NUM_CLUSTERS = 7
g = 800

In [3]:
CLUSTER_DIM = 7 * NUM_CLUSTERS

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

toolbox.register("attr_flt", random.uniform, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_flt,n=CLUSTER_DIM)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

#reduced_matrix_np = joblib.load('reduced_matrix_np.pkl')
#matrix_algorithm = reduced_matrix_np

matrix_np = joblib.load('matrix_np.pkl')
matrix_algorithm = matrix_np

#ind_fqua = joblib.load("results/fobjQuadraticError/iteration{}/best_gen_individuals_iter{}.pkl".format(g, g))
#best_ind_qua = ind_fqua[-1]

#ind_finer = joblib.load("./results/fobjInertia/best_gen_individuals{}.pkl".format(150))
#best_ind_iner = ind_finer[-1]

# Create Folders

In [4]:
path = "./results/convergence/fobjQuadraticError/iteration{}".format(g)
try:
    if not os.path.isdir(path):
        os.makedirs(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

Successfully created the directory ./results/convergence/fobjQuadraticError/iteration800 


# Define Functions

In [5]:
def auxiliarFunction(documents, dimensions, number_topics):

    counter_docs = [0 for x in range(0,number_topics)]
    
    distance_to_cluster_2 = [0 for x in range(0,number_topics)] # distances to cluster
    
    for document in documents: 
        distance_to_cluster = []   
        counter = 0
        for dim in range(0,number_topics): 
            distance_to_cluster.append(np.linalg.norm(document-dimensions[counter:counter+7]))
            counter = counter + 7
            
        best_distance_index = distance_to_cluster.index(min(distance_to_cluster)) 
        
        counter_docs[best_distance_index] += 1
    
        distance_to_cluster_2[best_distance_index] += pow(distance_to_cluster[best_distance_index], 2)
        
     
    return counter_docs, distance_to_cluster_2

In [6]:
def fobjInertia(k):
    
    num_topics = int((len(k))/7)
    
    # 1º Number of documents to each cluster
    counter_documents, euclidean_dist_2 = auxiliarFunction(documents = matrix_algorithm, dimensions= k, number_topics = num_topics)
    
    bool_check = 0 in counter_documents    

   # 2º Fitness fuction
    if(bool_check):
        total_value = 1000000000 # We give a high value to those clusters who do not have documents
        
    else:
        total_value = sum(euclidean_dist_2) # Euclidean distance summatory (K clusters)
    return total_value

In [7]:
def fobjQuadraticError(k):

    num_topics = int((len(k))/7)
    
    # 1º Number of documents to each cluster
    counter_documents, euclidean_dist_2 = auxiliarFunction(documents = matrix_algorithm, dimensions= k, number_topics = num_topics)
    
    bool_check = 0 in counter_documents    

   # 2º Fitness fuction
    if(bool_check):
        total_value = 5 # We give a high value to those clusters who do not have documents
        
    else:
        value1 = 0
        for i in range(0,num_topics): #para cada uno de los clusters
            value2 = euclidean_dist_2[i] 
            value1 += ((1/counter_documents[i])*value2)

        total_value = (value1 / num_topics) 

    return (total_value)

In [8]:
# Para los que necesiten J
def gradfunc(x):
    # Gradient
    num_var = 49
    h=0.001
    grad = [0 for _ in range(0,num_var)]
    current_sol = x
    current_val = fobj2(current_sol)
    
    for i in range(0, num_var): 
        current_sol_plus = current_sol
        current_sol_plus[i] = current_sol_plus[i] + h
        grad[i] = (fobj2(current_sol_plus) - current_val)/h 

    return grad

bounds = Bounds([0]*7*NUM_CLUSTERS,
                [1]*7*NUM_CLUSTERS)

In [9]:
def sum_one1 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[0*7+i]
    sum = abs(sum-1)
    return sum
    

In [10]:
def sum_one2 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[1*7+i]
    sum = abs(sum-1)
    return sum

In [11]:
def sum_one3 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[2*7+i]
    sum = abs(sum-1)
    return sum

In [12]:
def sum_one4 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[3*7+i]
    sum = abs(sum-1)
    return sum

In [13]:
def sum_one5 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[4*7+i]
    sum = abs(sum-1)
    return sum

In [14]:
def sum_one6 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[5*7+i]
    sum = abs(sum-1)
    return sum

In [15]:
def sum_one7 (k):
    sum  = 0
    for i in range(7):
        sum = sum + k[6*7+i]
    sum = abs(sum-1)
    return sum

In [16]:
best_ind_qua = [0.08677035865210417, 0.07278468712428088, 0.07109726182607509, 0.4904948137060272, 0.09094998615804475, 0.0989125036064867, 0.08899038892698147, 0.5270465950617436, 0.07692425033026434, 0.07298998649076999, 0.08838148821350596, 0.0782574396572057, 0.045150883561909466, 0.11124935668460073, 0.06956840289940822, 0.06410684430351289, 0.0620043574482015, 0.06593183540050367, 0.08777158651469993, 0.06235934461899528, 0.5882576288146786, 0.07427020259065427, 0.4895285705813661, 0.09548090971259429, 0.06934601238291002, 0.0884145995843863, 0.08787019194165241, 0.09508951320643658, 0.06802896944417813, 0.10270313010011142, 0.10596313348643696, 0.08170172003230411, 0.07840352583234647, 0.4930168144094431, 0.07018270669517965, 0.09125486275914127, 0.08737765255740326, 0.07529169583310306, 0.0799640209212711, 0.512692538835611, 0.06211787976351038, 0.0913013493299602, 0.08847800619080015, 0.08981823273928771, 0.49349383502511723, 0.054521657611092095, 0.08764783879936638, 0.10199709557651074, 0.08404333405782603]

In [17]:
cons1 = {'type': 'eq', 'fun': sum_one1}
cons2 = {'type': 'eq', 'fun': sum_one2}
cons3 = {'type': 'eq', 'fun': sum_one3}
cons4 = {'type': 'eq', 'fun': sum_one4}
cons5 = {'type': 'eq', 'fun': sum_one5}
cons6 = {'type': 'eq', 'fun': sum_one6}
cons7 = {'type': 'eq', 'fun': sum_one7}

cons = [cons1,cons2,cons3,cons4,cons5,cons6,cons7]

In [18]:
def callbackF(Xi):
    global Nfeval
    print ('{0:4d} {1: 3.6f}'.format(Nfeval, fobjQuadraticError(Xi)))
    Nfeval += 1

### SLSQP

In [19]:
"""
eps: Step size used for numerical approximation of the Jacobian.

ftol : Precision goal for the value of f in the stopping criterion.

"""
start_time = time()

Nfeval = 1

print  ('{0:4s}  {1:9s}'.format('Iter', 'f(X)'))

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='SLSQP', jac=None, 
               bounds=bounds, constraints=cons, tol=None, callback=callbackF,
               options={'maxiter': 100, 'ftol': 0.0000001, 'iprint': 1, 'disp': True, 
               'eps': 0.0000001, 'finite_diff_rel_step': None})
print(res.x)

print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/iteration{}/SLSQP.pkl".format(g))

Iter  f(X)     
   1  0.084250
   2  0.105616
   3  0.082737
   4  0.082711
   5  0.082693
   6  0.082683
   7  0.082675
   8  0.082662
   9  0.082643
  10  0.082630
  11  0.082625
  12  0.082625
Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.08262480360461126
            Iterations: 12
            Function evaluations: 603
            Gradient evaluations: 12
[0.07090695 0.05472499 0.04853281 0.57385279 0.08962707 0.08657469
 0.07578069 0.59317938 0.06076465 0.05862975 0.07064416 0.06814184
 0.04867284 0.09996735 0.06900363 0.05984088 0.053046   0.0576334
 0.07372197 0.05189116 0.63486295 0.07800525 0.50455501 0.08606383
 0.0680363  0.09070192 0.08341853 0.08921917 0.060258   0.08105422
 0.09413673 0.09088451 0.07929615 0.52263475 0.07173562 0.07688051
 0.06956704 0.06030905 0.07134867 0.57871554 0.0567995  0.08637969
 0.08485652 0.07823738 0.53178314 0.05498048 0.08116257 0.09027348
 0.07870644]
--- 4515.074224472046 s seconds ---


['results/convergence/fobjQuadraticError/iteration800/SLSQP.pkl']

In [20]:
print(res.x)

[0.07090695 0.05472499 0.04853281 0.57385279 0.08962707 0.08657469
 0.07578069 0.59317938 0.06076465 0.05862975 0.07064416 0.06814184
 0.04867284 0.09996735 0.06900363 0.05984088 0.053046   0.0576334
 0.07372197 0.05189116 0.63486295 0.07800525 0.50455501 0.08606383
 0.0680363  0.09070192 0.08341853 0.08921917 0.060258   0.08105422
 0.09413673 0.09088451 0.07929615 0.52263475 0.07173562 0.07688051
 0.06956704 0.06030905 0.07134867 0.57871554 0.0567995  0.08637969
 0.08485652 0.07823738 0.53178314 0.05498048 0.08116257 0.09027348
 0.07870644]


# Powell

In [None]:
start_time = time()

res =minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='Powell', 
         bounds=None, tol=None, callback=None, options={'xtol': 0.0001,
        'ftol': 1e-15, 'maxiter': 100, 
        'maxfev': None, 'disp': True, 'direc': None, 'return_all': False})
print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/iteration{}/Powell.pkl".format(g))

# CG

In [None]:
start_time = time()

res =minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(),
         method='CG', jac=None, tol=None, callback=None, 
         options={'gtol': 1e-10, 'eps': 1.4901161193847656e-15,
        'maxiter': 100, 'disp': True, 'return_all': False, 'finite_diff_rel_step': None})

print(res.x)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/iteration{}/CG.pkl".format(g))

# L-BFGS-B

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), 
         method='L-BFGS-B', jac=None,
         bounds=bounds, tol=None, callback=None, 
         options={'disp': None, 'maxcor': 10, 
        'ftol': 2.220446049250313e-15, 'gtol': 1e-8, 'eps': 1e-08, 'maxfun': 15000, 'maxiter': 15000, 
        'iprint': - 1, 'maxls': 20,
        'finite_diff_rel_step': None})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/iteration{}/L-BFGS-B.pkl".format(g))

# TNC

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='TNC',
        jac=None, bounds=bounds, tol=None, callback=None, 
         options={'eps': 1.4901161193847656e-15, 'scale': None, 'offset': None, 
        'mesg_num': None, 'maxCGit': - 1, 'maxiter': None, 'eta': - 1, 
        'stepmx': 0, 'accuracy': 0, 'minfev': 0, 'ftol': - 1, 'xtol': - 1, 
        'gtol': - 1, 'rescale': - 1, 'disp': True,
        'finite_diff_rel_step': None, 'maxfun': 100})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/TNC.pkl".format(g))

# COBYLA

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='COBYLA', constraints=(), tol=2.220446049250313e-15, callback=None, 
         options={'rhobeg': 1.0, 'maxiter': 1000, 
                  'disp': False, 'catol': 2.220446049250313e-15})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/iteration{}/COBYLA.pkl".format(g))

# trust-constr

In [None]:
start_time = time()

res = minimize(fun = fobjQuadraticError, x0= best_ind_qua, args=(), method='trust-constr', hess=None, hessp=None, bounds=bounds, constraints=(), tol=None, callback=None, 
        options={'xtol': 1e-15, 'gtol': 1e-08, 'barrier_tol': 1e-15, 
        'sparse_jacobian': None, 'maxiter': 1000, 'verbose': 0, 'finite_diff_rel_step': None, 'initial_constr_penalty': 1.0, 
        'initial_tr_radius': 1.0, 'initial_barrier_parameter': 0.1, 
        'initial_barrier_tolerance': 0.1, 'factorization_method': None, 'disp': True})
print(res)
print("--- {} s seconds ---".format((time() - start_time)))

joblib.dump(res.x, "results/convergence/fobjQuadraticError/iteration{}/trust-constr.pkl".format(g))