In [1]:
import pandas as pd
import csv
import numpy as np
import time

from Helper.Preprocessing import *
from Helper.Model import *
from Helper.GeneticAlgorithm import GeneticAlgorithm
from Helper.PSO import PSO
from Helper.FileManager import FileManager

In [2]:
dfx = pd.read_csv("../ufjf_dataset.csv")
dfx['time1'] = pd.to_datetime(dfx['time1'])
dfx.index = dfx['time1']
dfx['radio_id'] = dfx['radio_id'].str.lower()
dfx.head(10)

Unnamed: 0_level_0,time1,timet,client_id,radio_id
time1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-05-31 08:48:52,2019-05-31 08:48:52,13.0,7,economia-4andar
2019-05-31 08:48:52,2019-05-31 08:48:52,31.0,20,ru_nutricionistas
2019-05-31 08:48:53,2019-05-31 08:48:53,38.0,29,ap4600-67aef4
2019-05-31 08:48:53,2019-05-31 08:48:53,17.0,12,reitoria-centralatendimento
2019-05-31 08:48:54,2019-05-31 08:48:54,10.0,5,ccs-2andarfundos
2019-05-31 08:48:55,2019-05-31 08:48:55,9.0,6,engenharia-labspot
2019-05-31 08:48:56,2019-05-31 08:48:56,1.0,1,predioitamarfranco3andarsala5306
2019-05-31 08:48:56,2019-05-31 08:48:56,2.0,2,sala4157ladocantinaengenharia
2019-05-31 08:48:56,2019-05-31 08:48:56,2.0,3,iad-1andar
2019-05-31 08:48:57,2019-05-31 08:48:57,12.0,1,predioitamarfranco2andarsala5206


In [3]:
dfxx = dfx[dfx['radio_id'].str.contains('ru')]
l_ru = list(dfxx['radio_id'].unique())
l_ru.remove('deptconstrucaocivilsala4107')
l_ru

df = dfx[dfx['radio_id'].str.contains('|'.join(l_ru))]

X, Y = Preprocessing(df).get_data()

Pre-processing...


# Common Variables

In [4]:
generation = 15
population_size = 15
P = 11
Q = 3

# MLP Hyperparameters

In [5]:
n_MAX = 200
n_MIN = 20
learning_rate_MAX = 0.0009
learning_rate_MIN = 0.00001
maxiter = 2000

boundaries = np.zeros([2,2])
boundaries[0,0] = n_MIN
boundaries[1,0] = n_MAX
boundaries[0,1] = learning_rate_MIN
boundaries[1,1] = learning_rate_MAX

boundaries_type = [None] * 2
boundaries_type[0] = True # Integer
boundaries_type[1] = False # Float

print(boundaries[0,:])

[2.e+01 1.e-05]


# Genetic Algorithm - MLP

In [None]:
ga = GeneticAlgorithm(population_size, boundaries, boundaries_type)
fm = FileManager()
fm.create_file("ga_test")
index = 0
while index <= generation:
    index += 1
    print("\n--- Generation {} ---".format(index))
    ga.generate()  
    population = ga.get_population()
    for i in population:
        start_time = time.time()
        parameters = i.get_parameters()
        
        model = Model(X, Y, model_type = 'MLP', n_neurons = parameters[0], learning_rate = parameters[1])
        
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'adjusted_r2')
        
        ga.evaluate(i, fitness_r2)
        ga_time = time.time() - start_time
        fm.write2file(parameters, P, Q, fitness_r2, ga_time)
    
    ga.print_population()
    ga.print_best_solution()
fm.close_file()

# PSO - MLP

In [None]:
# Initializing the variables and the population
pso = PSO(population_size, boundaries, boundaries_type)
fm = FileManager()
pop = pso.get_population()
fm.create_file("pso_test")

print('\nInitializing the population...')
for p in pop:
    start_time = time.time()
    param = p.get_position()

    model = Model(X, Y, model_type = 'MLP', n_neurons = int(param[0]), learning_rate = param[1])
    fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'adjusted_r2')

    pso.insert_particle_fitness(p, fitness_r2)
    pso_time = time.time() - start_time
    fm.write2file(param, P, Q, fitness_r2, pso_time)

pso.print_global_best_particle()

iteration = 0
print('\nRunning PSO Loop...')
while(iteration <= generation):
    iteration += 1
    print('\nRunning... : {} of {}.'.format(iteration, generation))
    for p in pop:
        start_time = time.time()
        print("Particle {}.".format(p.get_index()))
        pso.calculate_position_velocity(p)
        param = p.get_position()
        print(param)

        model = Model(X, Y, model_type = 'MLP', n_neurons = int(param[0]), learning_rate = param[1])
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'adjusted_r2')
        
        pso.insert_particle_fitness(p, fitness_r2)
        pso_time = time.time() - start_time
        fm.write2file(param, P, Q, fitness_r2, pso_time)
        
    pso.print_global_best_particle()
    print("GBest_swap = {}".format(pso.get_best_particle_swap()))
    
fm.close_file()

# DT Hyperparameters

In [None]:
max_depth_MAX = 200
max_depth_MIN = 10
min_samples_split_MAX = 22
min_samples_split_MIN = 2

boundaries = np.zeros([2,2])
boundaries[0,0] = max_depth_MIN
boundaries[1,0] = max_depth_MAX
boundaries[0,1] = min_samples_split_MIN
boundaries[1,1] = min_samples_split_MAX

boundaries_type = [None] * 2
boundaries_type[0] = True # Integer
boundaries_type[1] = True # Integer

print(boundaries[0,:])

# Genetic Algorithm - DT

In [None]:
ga = GeneticAlgorithm(population_size, boundaries, boundaries_type)
fm = FileManager()
fm.create_file("ga_test_dt")
index = 0
while index <= generation:
    index += 1
    print("\n--- Generation {} ---".format(index))
    ga.generate()  
    population = ga.get_population()
    for i in population:
        start_time = time.time()
        parameters = i.get_parameters()
        
        model = Model(X, Y, model_type = 'DT', max_depth = parameters[0], min_samples_split = parameters[1])
        
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'adjusted_r2')
        
        ga.evaluate(i, fitness_r2)
        ga_time = time.time() - start_time
        fm.write2file(parameters, P, Q, fitness_r2, ga_time)
    
    ga.print_population()
    ga.print_best_solution()
fm.close_file()

# PSO - DT

In [None]:
# Initializing the variables and the population
pso = PSO(population_size, boundaries, boundaries_type)
fm = FileManager()
pop = pso.get_population()
fm.create_file("pso_test_dt")

print('\nInitializing the population...')
for p in pop:
    start_time = time.time()
    param = p.get_position()

    model = Model(X, Y, model_type = 'DT', max_depth = int(param[0]), min_samples_split = int(param[1]))
    fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'adjusted_r2')

    pso.insert_particle_fitness(p, fitness_r2)
    pso_time = time.time() - start_time
    fm.write2file(param, P, Q, fitness_r2, pso_time)

pso.print_global_best_particle()

iteration = 0
print('\nRunning PSO Loop...')
while(iteration <= generation):
    iteration += 1
    print('\nRunning... : {} of {}.'.format(iteration, generation))
    for p in pop:
        start_time = time.time()
        print("Particle {}.".format(p.get_index()))
        pso.calculate_position_velocity(p)
        param = p.get_position()
        print(param)

        model = Model(X, Y, model_type = 'DT', max_depth = int(param[0]), min_samples_split = int(param[1]))
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'adjusted_r2')
        
        pso.insert_particle_fitness(p, fitness_r2)
        pso_time = time.time() - start_time
        fm.write2file(param, P, Q, fitness_r2, pso_time)
        
    pso.print_global_best_particle()
    print("GBest_swap = {}".format(pso.get_best_particle_swap()))
    
fm.close_file()

In [None]:
ga_mlp = ga.get_list_best()
pso_mlp = pso.get_list_best()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(ga_mlp, color = 'r', label = 'GA-MLP')
plt.plot(pso_mlp, color = 'b', label = 'PSO-MLP')
plt.xlabel("Individual/Particle")
plt.ylabel("Average R² ")
plt.title("PSO and GA Comparison - MLP")
plt.legend()
plt.tight_layout()
plt.savefig('gapsomlp.png', dpi = 200)
plt.show()

In [None]:
ga_dt = ga.get_list_best()
pso_dt = pso.get_list_best()
plt.plot(ga_dt, color = 'r', label = 'GA-DT')
plt.plot(pso_dt, color = 'b', label = 'PSO-DT')
plt.xlabel("Individual/Particle")
plt.ylabel("Average R² ")
plt.title("PSO and GA Comparison - DT")
plt.legend()
plt.tight_layout()
plt.savefig('gapsodt.png', dpi = 200)
plt.show()