In [None]:
import pandas as pd
import csv
import numpy as np
import time

from Helper.Preprocessing import *
from Helper.Model import *
from Helper.GeneticAlgorithm import GeneticAlgorithm
from Helper.PSO import PSO
from Helper.FileManager import FileManager

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MaxNLocator

sns.set(rc = {'figure.figsize':(20,8)})

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import GRU
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
from keras.callbacks import EarlyStopping

In [None]:
dfx = pd.read_csv("../ufjf_dataset.csv")
dfx['timestamp'] = pd.to_datetime(dfx['timestamp'])
dfx.index = dfx['timestamp']
dfx['access_point'] = dfx['access_point'].str.lower()
dfx.head(10)

In [None]:
print(len(dfx.access_point.unique()))
print(len(dfx.host.unique()))

In [None]:
dfxx = dfx[dfx['access_point'].str.contains('ru')]
l_ru = list(dfxx['access_point'].unique())
#l_ru.remove('deptconstrucaocivilsala4107')
print(l_ru)

df = dfx[dfx['access_point'].str.contains('|'.join(l_ru))]

X, Y = Preprocessing(df).get_data()
max(Preprocessing(df).get_dataframe()['count'])

# Common Variables

In [None]:
generation = 5
population_size = 5
P = 11
Q = 3
metric = 'r2'

# MLP Hyperparameters

In [None]:
n_MAX = 200
n_MIN = 20
learning_rate_MAX = 0.0009
learning_rate_MIN = 0.00001
maxiter = 2000

boundaries = np.zeros([2,2])
boundaries[0,0] = n_MIN
boundaries[1,0] = n_MAX
boundaries[0,1] = learning_rate_MIN
boundaries[1,1] = learning_rate_MAX

boundaries_type = [None] * 2
boundaries_type[0] = True # Integer
boundaries_type[1] = False # Float

print(boundaries[0,:])

# Genetic Algorithm - MLP

In [None]:
ga = GeneticAlgorithm(population_size, boundaries, boundaries_type, mutation_rate = 0.6)
fm = FileManager()
fm.create_file("ga_test_mlp_" + metric)
index = 0
mlp_ga_ite = []
while index < generation:
    index += 1
    print("\n--- Generation {} ---".format(index))
    ga.generate()  
    population = ga.get_population()
    for i in population:
        start_time = time.time()
        parameters = i.get_parameters()
        
        model = Model(X, Y, model_type = 'MLP', n_neurons = parameters[0], learning_rate = parameters[1])
        
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = metric)
        
        ga.evaluate(i, fitness_r2)
        ga_time = time.time() - start_time
        fm.write2file([parameters, P, Q, fitness_r2, ga_time])
    
    mlp_ga_ite.append(ga.get_best_individual().get_fitness())
    ga.print_population()
    ga.print_best_solution()
fm.close_file()

# PSO - MLP

In [None]:
# Initializing the variables and the population
pso = PSO(population_size, boundaries, boundaries_type)
fm = FileManager()
pop = pso.get_population()
fm.create_file("pso_test_mlp_" + metric)
mlp_pso_ite = []
print('\nInitializing the population...')
for p in pop:
    start_time = time.time()
    param = p.get_position()

    model = Model(X, Y, model_type = 'MLP', n_neurons = int(param[0]), learning_rate = param[1])
    fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = metric)

    pso.insert_particle_fitness(p, fitness_r2)
    pso_time = time.time() - start_time
    fm.write2file([param, P, Q, fitness_r2, pso_time])

pso.print_global_best_particle()

iteration = 0
print('\nRunning PSO Loop...')
while(iteration < generation):
    iteration += 1
    print('\nRunning... : {} of {}.'.format(iteration, generation))
    for p in pop:
        start_time = time.time()
        print("Particle {}.".format(p.get_index()))
        pso.calculate_position_velocity(p)
        param = p.get_position()
        print(param)

        model = Model(X, Y, model_type = 'MLP', n_neurons = int(param[0]), learning_rate = param[1])
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = metric)
        
        pso.insert_particle_fitness(p, fitness_r2)
        pso_time = time.time() - start_time
        fm.write2file([param, P, Q, fitness_r2, pso_time])
        
    mlp_pso_ite.append(pso.get_gbest_particle().get_pbest_fitness())
    pso.print_global_best_particle()
    print("GBest_swap = {}".format(pso.get_best_particle_swap()))
    
fm.close_file()

# DT Hyperparameters

In [None]:
max_depth_MAX = 200
max_depth_MIN = 10
min_samples_split_MAX = 42
min_samples_split_MIN = 2

boundaries = np.zeros([2,2])
boundaries[0,0] = max_depth_MIN
boundaries[1,0] = max_depth_MAX
boundaries[0,1] = min_samples_split_MIN
boundaries[1,1] = min_samples_split_MAX

boundaries_type = [None] * 2
boundaries_type[0] = True # Integer
boundaries_type[1] = True # Integer

print(boundaries[0,:])

# Genetic Algorithm - DT

In [None]:
ga = GeneticAlgorithm(population_size, boundaries, boundaries_type, mutation_rate = 0.5)
fm = FileManager()
fm.create_file("ga_test_dt_" + metric)
index = 0
dt_ga_ite = []
while index < generation:
    index += 1
    print("\n--- Generation {} ---".format(index))
    ga.generate()  
    population = ga.get_population()
    for i in population:
        start_time = time.time()
        parameters = i.get_parameters()
        
        model = Model(X, Y, model_type = 'DT', max_depth = parameters[0], min_samples_split = parameters[1])
        
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = metric)
        
        ga.evaluate(i, fitness_r2)
        ga_time = time.time() - start_time
        fm.write2file([parameters, P, Q, fitness_r2, ga_time])
    
    dt_ga_ite.append(ga.get_best_individual().get_fitness())
    ga.print_population()
    ga.print_best_solution()
fm.close_file()

# PSO - DT

In [None]:
# Initializing the variables and the population
pso = PSO(population_size, boundaries, boundaries_type)
fm = FileManager()
pop = pso.get_population()
fm.create_file("pso_test_dt_" + metric)
dt_pso_ite = []
print('\nInitializing the population...')
for p in pop:
    start_time = time.time()
    param = p.get_position()
    print(param)
    model = Model(X, Y, model_type = 'DT', max_depth = int(param[0]), min_samples_split = int(param[1]))
    fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = metric)

    pso.insert_particle_fitness(p, fitness_r2)
    pso_time = time.time() - start_time
    fm.write2file([param, P, Q, fitness_r2, pso_time])

pso.print_global_best_particle()

iteration = 0
print('\nRunning PSO Loop...')
while(iteration < generation):
    iteration += 1
    print('\nRunning... : {} of {}.'.format(iteration, generation))
    for p in pop:
        start_time = time.time()
        print("Particle {}.".format(p.get_index()))
        pso.calculate_position_velocity(p)
        param = p.get_position()
        print(param)

        model = Model(X, Y, model_type = 'DT', max_depth = int(param[0]), min_samples_split = int(param[1]))
        fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = metric)
        
        pso.insert_particle_fitness(p, fitness_r2)
        pso_time = time.time() - start_time
        fm.write2file([param, P, Q, fitness_r2, pso_time])
        
    dt_pso_ite.append(pso.get_gbest_particle().get_pbest_fitness())
    pso.print_global_best_particle()
    print("GBest_swap = {}".format(pso.get_best_particle_swap()))
    
fm.close_file()

In [None]:
mlp_ga_ite

In [None]:
x = [i for i in range(1, len(dt_ga_ite) + 1)]
data_preproc = pd.DataFrame({
    'x': x, 
    'GA-MLP': mlp_ga_ite,
    'PSO-MLP': mlp_pso_ite,
    'GA-DT': dt_ga_ite,
    'PSO-DT': dt_pso_ite})

plt.figure()
fontsize = 20
ax = sns.lineplot(x='x', y='value', hue='variable', data=pd.melt(data_preproc, ['x']), marker = 'o')
ax.set(xticks=data_preproc['x'])
plt.xlabel("Iteration", fontsize = fontsize)
plt.ylabel("Average " + metric, fontsize = fontsize)
plt.title("PSO and GA Comparison - MLP", fontsize = fontsize)
plt.legend(prop={'size': 16})
plt.tight_layout()
plt.rcParams.update({'font.size': 24})
plt.savefig('gapsomlp_' + metric + '.png', dpi = 200)

In [None]:
x = [i for i in range(1, len(dt_ga_ite) + 1)]
ax = plt.figure().gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
plt.plot(x, dt_ga_ite, color = 'r', label = 'GA-DT')
plt.plot(x, dt_pso_ite, color = 'b', label = 'PSO-DT')
plt.xlabel("Iteration")
plt.ylabel("Average R² ")
plt.title("PSO and GA Comparison - DT")
plt.legend()
plt.tight_layout()
plt.savefig('gapsodt.png', dpi = 200)
plt.show()

In [None]:
for i in l_ru:
    df_test = dfx[dfx['radio_id'] == i]
    pp = Preprocessing(df_test)
    X, Y = pp.get_data()
    
    model = Model(X, Y, model_type = 'MLP', max_depth = 200, min_samples_split = 0.00005)
    fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'r2')
    print('radio_id: {} max: {} min: {}, metric: {}'.format(i, max(pp.get_dataframe()['count']), min(pp.get_dataframe()['count']) , fitness_r2))

In [None]:
i = 'iad-2andar'
df_test = dfx[dfx['radio_id'] == i]
pp = Preprocessing(df_test)
X, Y = pp.get_data()

model = Model(X, Y, model_type = 'MLP', max_depth = 200, min_samples_split = 0.00005)
fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'r2')
print('radio_id: {} max: {} min: {}, metric: {}'.format(i, max(pp.get_dataframe()['count']), min(pp.get_dataframe()['count']) , fitness_r2))

In [None]:
i = 'iad-1andar'
df_test = dfx[dfx['radio_id'] == i]
pp = Preprocessing(df_test)
X, Y = pp.get_data()

model = Model(X, Y, model_type = 'MLP', max_depth = 200, min_samples_split = 0.00005)
fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'r2')
print('radio_id: {} max: {} min: {}, metric: {}'.format(i, max(pp.get_dataframe()['count']), min(pp.get_dataframe()['count']) , fitness_r2))

In [None]:
i = 'ap4600-67afac'
df_test = dfx[dfx['radio_id'] == i]
pp = Preprocessing(df_test)
X, Y = pp.get_data()

model = Model(X, Y, model_type = 'MLP', max_depth = 200, min_samples_split = 0.00005)
fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'r2')
print('radio_id: {} max: {} min: {}, metric: {}'.format(i, max(pp.get_dataframe()['count']), min(pp.get_dataframe()['count']) , fitness_r2))

In [None]:
i = 'anfiteatro'
df_test = dfx[dfx['radio_id'] == i]
pp = Preprocessing(df_test)
X, Y = pp.get_data()

model = Model(X, Y, model_type = 'MLP', max_depth = 200, min_samples_split = 0.00005)
fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 5, metric = 'r2')
print('radio_id: {} max: {} min: {}, metric: {}'.format(i, max(pp.get_dataframe()['count']), min(pp.get_dataframe()['count']) , fitness_r2))

In [None]:
dftest = pd.read_csv("../ufjf_dataset.csv")
dftest.rename(columns={'time1': 'timestamp', 'timet': 'connection_time', 'client_id': 'host', 'radio_id': 'access_point'}, inplace=True)
dftest.to_csv('ufjf_dataset.csv', index=False)

In [None]:
from os import listdir
from os.path import isfile, join
mypath = '.'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles = [i for i in onlyfiles if i.endswith('_best.csv')]
onlyfiles

In [None]:
for i in onlyfiles:
    df_plot = pd.read_csv(i)
    metric_used = i[:-9]
    sns.set(rc = {'figure.figsize':(20,8)})
    x = [i for i in range(1,11)]
    data_preproc = pd.DataFrame({
        'x': x,
        'GA-MLP': list(df_plot[df_plot['Opt'] == 'GA'][df_plot['Model'] == 'MLP']['Best_fitness']),
        'PSO-MLP': list(df_plot[df_plot['Opt'] == 'PSO'][df_plot['Model'] == 'MLP']['Best_fitness']),
        'GA-DT': list(df_plot[df_plot['Opt'] == 'GA'][df_plot['Model'] == 'DT']['Best_fitness']),
        'PSO-DT': list(df_plot[df_plot['Opt'] == 'PSO'][df_plot['Model'] == 'DT']['Best_fitness'])})

    plt.figure()
    print(metric_used)
    print(data_preproc.min())
    print(data_preproc.max())
    if(metric_used == 'r2'):
        metric_used = 'r²'
    fontsize = 44
    ax = sns.lineplot(x='x', y='value', hue='variable', data=pd.melt(data_preproc, ['x']), marker = 'o', linewidth=4)
    ax.set(xticks=data_preproc['x'])
    plt.xlabel("Iteração", fontsize = fontsize)
    plt.ylabel(metric_used.upper() + " Médio", fontsize = fontsize)
    #plt.title("PSO and GA Comparison - MLP and DT", fontsize = fontsize)
    leg = plt.legend(prop={'size': 38})
    leg_lines = leg.get_lines()
    plt.xticks(fontsize = fontsize)
    plt.yticks(fontsize = fontsize)
    plt.setp(leg_lines, linewidth=6)
    plt.tight_layout()
    plt.savefig(metric_used + '_gapsomlpdt.png', dpi = 200)

In [None]:
from sklearn.metrics import r2_score
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
n = 300
#X = np.reshape(X,(X.shape[0], 1, X.shape[1]))
shape = X.shape[1:]
early_stopping_monitor = EarlyStopping(monitor='loss',patience=2)
kf = RepeatedKFold(n_splits=3, n_repeats=5)
score = []
epochs = 1000
n_batch = 64
i = 0
start_time = time.time()
for train_index, test_index in kf.split(X):
    i += 1
    print(i)
    x_train, x_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    
    model = Sequential()
    model.add(GRU(n, input_shape = shape, return_sequences = False, kernel_initializer = 'normal'))#, activation = cf
    model.add(Dense(units = 1, activation = 'sigmoid', kernel_initializer = 'normal'))#return_sequences = False
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(loss='mean_squared_error', optimizer=opt)
    model.fit(x_train, y_train, epochs = epochs, batch_size = n_batch, verbose = 0, callbacks=[early_stopping_monitor])
    predicted = model.predict(x_test)
    
    score.append(r2_score(y_test, predicted))
    
final_time = time.time()
print(final_time - start_time)

In [None]:
np.mean(score)

# Loading Model

In [None]:
import joblib
print(joblib.__version__)

In [None]:
filename = 'r2ga_mlp.sat'
model = joblib.load(filename)

In [None]:
#dfx = pd.read_csv("../ufjf_dataset.csv")
dfx = pd.read_csv("../ufjf_dataset_cleaned.csv")
dfx['timestamp'] = pd.to_datetime(dfx['timestamp'])
dfx.index = dfx['timestamp']
dfx['access_point'] = dfx['access_point'].str.lower()

dfxx = dfx[dfx['access_point'].str.contains('ru')]
l_ru = list(dfxx['access_point'].unique())
print(l_ru)

In [None]:
df = dfx[dfx['access_point'].str.contains('|'.join(l_ru))]
# df = dfx[dfx['access_point'] == 'iad-2andar']
pp = Preprocessing(df)
X, Y = pp.get_data()

kf = RepeatedKFold(n_splits=3, n_repeats=5)
score = []
best_metric = 0
best_y_test = None
best_predicted = None
for train_index, test_index in kf.split(X):
    x_train, x_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    predicted = model.predict(x_test)
    metric = r2_score(y_test, predicted)
    score.append(metric)
    if(best_metric < metric):
        best_metric = metric
        best_y_test = y_test
        best_predicted = predicted
        
    
print('%.4f +- %.4f' % (np.mean(score), np.std(score)))
plt.scatter(best_predicted, best_y_test)
plt.plot([0.0,0.9],[0.0,0.9], color = 'r')
plt.xlabel("Number of Predicted Users")
plt.ylabel("Number of Real Users")
plt.tight_layout()
plt.savefig('predxreal.png', dpi = 200)
plt.show()

In [None]:
max_value = pp.get_max_value()
min_value = pp.get_min_value()
ap_threshold = 65

In [None]:
def convert_back(value, max_value, min_value):
    return int((max_value - min_value) * value)

def convert_number_AP(value, threshold):
    return int(value / threshold) + 1

best_predicted

In [None]:
best_predicted_test = [convert_back(value, max_value, min_value) for value in best_predicted]
predicted_number_ap = [convert_number_AP(value, ap_threshold) for value in best_predicted_test]
x = [i for i in range(len(predicted_number_ap))]
df_plot = pd.DataFrame(predicted_number_ap, columns = ['Number_Allocated_AP'])
df_plot = df_plot['Number_Allocated_AP'].value_counts().to_frame()
df_plot['type'] = 'Predito'

best_y_test1 = [convert_back(value, max_value, min_value) for value in best_y_test]
real_number_ap = [convert_number_AP(value, ap_threshold) for value in best_y_test1]
x = [i for i in range(len(real_number_ap))]
df_plot1 = pd.DataFrame(real_number_ap, columns = ['Number_Allocated_AP'])
df_plot1 = df_plot1['Number_Allocated_AP'].value_counts().to_frame()
df_plot1['type'] = 'Real'

In [None]:
frames = [df_plot, df_plot1]
  
result = pd.concat(frames)
result['APx'] = result.index
result

In [None]:
import collections
#sns.set(rc = {'figure.figsize':(20,8)})

hit = 0
total = len(real_number_ap)
for i in range(len(predicted_number_ap)):
    if predicted_number_ap[i] == real_number_ap[i]:
        hit += 1
        
print('%.2f%%' % ((hit / total) * 100))

sns.set(rc = {'figure.figsize':(20,8)})
test_predict = collections.Counter(predicted_number_ap)
test_predict = list(test_predict.values())
test_predict_pctg = [round(((i / total) * 100),2) for i in test_predict]

test_real = collections.Counter(real_number_ap)
test_real = list(test_real.values())
test_real_pctg = [round(((i / total) * 100),2) for i in test_real]

fontsize = 28
ax = plt.figure().gca()
#bar1 = plt.bar(np.arange(len(errorRateListOfFast)) + bar_width, errorRateListOfFast, bar_width, align='center', alpha=opacity, color='b', label='Fast <= 6 sec.')
#plt.hist(df_plot['Number_Allocated_AP'], bins=bins) #color = 'white', edgecolor='black', hatch=['x', '\\', '//', '/'])

sns.barplot(x='APx',y='Number_Allocated_AP',data = result,hue = 'type', edgecolor='black')
#result['Number_Allocated_AP'].value_counts().plot.bar(edgecolor = 'black', label = 'test', y=["Predicted", "True"])
#ax.xaxis.set_major_locator(MaxNLocator(integer=True))
plt.xlabel('Número de Pontos de Acesso Alocados', fontsize = fontsize)
plt.ylabel('Número de Predições', fontsize = fontsize)
plt.xticks(fontsize = fontsize, rotation = 0)
plt.yticks(fontsize = fontsize)
for i, v in enumerate(test_predict):
    ax.text(i - 0.2, v, str(v) + ' - ' + str(test_predict_pctg[i]) +'%', color='black', fontsize = 20, ha='center', va='bottom')
    
for i, v in enumerate(test_real):
    ax.text(i + 0.2, v, str(v) + ' - ' + str(test_real_pctg[i]) +'%', color='black', fontsize = 20, ha='center', va='bottom')

leg = plt.legend(prop={'size': fontsize})
leg_lines = leg.get_lines()
plt.tight_layout()
plt.savefig('barplot.png', dpi = 200)
plt.show()

In [None]:
best_y_test1 = [convert_back(value, max_value, min_value) for value in best_y_test]
real_number_ap = [convert_number_AP(value, ap_threshold) for value in best_y_test1]
x = [i for i in range(len(real_number_ap))]
df_plot = pd.DataFrame(real_number_ap, columns = ['Number_Allocated_AP'])

In [None]:
test = collections.Counter(real_number_ap)
test = list(test.values())

fontsize = 28

In [None]:
from sklearn.metrics import confusion_matrix
labels = df_plot['Number_Allocated_AP'].unique()
df_cm = confusion_matrix(real_number_ap, predicted_number_ap)
plt.figure(figsize = (10,7))
sns.heatmap(df_cm, annot=True, xticklabels=labels, yticklabels=labels, fmt='g', annot_kws={"fontsize": fontsize})
plt.xlabel("Predito", fontsize = fontsize)
plt.ylabel("Real", fontsize = fontsize)
plt.xticks(fontsize = fontsize)
plt.yticks(fontsize = fontsize)
plt.tight_layout()
plt.savefig("Conf_Mat.png")
plt.show()

In [None]:
dftest = pd.read_csv('ga_test_mlp_r2.csv')
dftest_aux = dftest[dftest['Param02'] == max(dftest['Param02'])]
dftest_aux

In [None]:
dftest = pd.read_csv('ga_test_dt_r2.csv')
dftest_aux = dftest[dftest['Param02'] == max(dftest['Param02'])]
dftest_aux

In [None]:
dftest = pd.read_csv('pso_test_dt_r2.csv')
dftest_aux = dftest[dftest['Param02'] == max(dftest['Param02'])]
dftest_aux

# Testing DT criterion

In [None]:
parameters = []

model = Model(X, Y, model_type = 'DT', max_depth = None, min_samples_split = 40)
model2 = Model(X, Y, model_type = 'DT', max_depth = None, min_samples_split = 40, criterion = 'squared_error')
model3 = Model(X, Y, model_type = 'DT', max_depth = None, min_samples_split = 40, criterion = 'absolute_error')
model4 = Model(X, Y, model_type = 'DT', max_depth = None, min_samples_split = 40, criterion = 'poisson')

fitness_r2 = model.fit_predict_evaluate(n_splits = 3, n_repeats = 3, metric = metric)
fitness_r22 = model2.fit_predict_evaluate(n_splits = 3, n_repeats = 3, metric = metric)
fitness_r23 = model3.fit_predict_evaluate(n_splits = 3, n_repeats = 3, metric = metric)
fitness_r24 = model4.fit_predict_evaluate(n_splits = 3, n_repeats = 3, metric = metric)
print("R2 usado: {}".format(fitness_r2))
print("R2 square: {}".format(fitness_r2))
print("R2 absolute: {}".format(fitness_r2))
print("R2 poisson: {}".format(fitness_r2))