In [1]:
import copy
import json
import logging
import numpy as np
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
from skopt import gp_minimize
import time


from src.experiments.aux_code import *
from datasets.get_datasets import *
from src.brkga.genetic import genetic as brkga
from src.brkga_variation.genetic import genetic as brkga_var
from src.s_genetic.genetic import *
from src.predicate import *

In [2]:
N_ROUNDS = 5
EXPERIMENT_NAME = 'imdb_cora'

In [3]:
def get_k_best_individuals(population, k):
    best_ind = []
    all_fitness = []
    for i in range(0, len(population)):
        all_fitness.append((population[i].fitness.values[0], i))
    
    sorted_ind = sorted(all_fitness, reverse=False, key=lambda tup: tup[0])
    for i in range(0, k):
        best_ind.append(population[sorted_ind[i][1]])
    return best_ind

In [4]:
source = 'workedunder'
target = 'samevenue'

In [5]:
kb = json.loads(open('src/experiments/kb.txt').readline())
kb_source = kb['imdb']
kb_target = kb['cora']

pred_target = create_pred_target(kb_target)

In [6]:
imdb_dataset = datasets.load('imdb', kb_source, target=source, seed=441773, balanced=0) #facts, pos, neg
cora_dataset = datasets.load('cora', kb_target, target=target, seed=441773, balanced=0) #facts, pos, neg

ss = []
with open('src/experiments/structures.json', 'r') as f:
    ss = json.loads(f.readline())
    
src_struct = copy.deepcopy(ss['imdb'])
new_src_struct = []
for i in range(0, len(src_struct)):
    new_src_struct.append(define_individual(src_struct[i], i))  
structured_src = src_struct

In [7]:
train_pos, train_neg, train_facts = cora_dataset[1], cora_dataset[2], cora_dataset[0]

In [8]:
res = genetic(new_src_struct, target, source, train_pos, train_neg, 
                      train_facts, kb_source, kb_target, pred_target,
                      NUM_GEN=14, pop_size=3, crossover_type='tree_ind', revision='guided')

# Testing s_genetic

In [8]:
mutation_rate_list = [round(x, 2) for x in list(np.arange(0.1, 0.4, 0.05))]
crossover_rate_list = [round(x, 2) for x in list(np.arange(0.6, 0.95, 0.05)) + [0.95]]

space  = [Categorical([1, 3, 5], name='num_individuals'),
          Categorical(mutation_rate_list, name='mutation_rate'),
          Categorical(crossover_rate_list, name='crossover_rate')]

In [9]:
@use_named_args(space)
def objective(**params):
    res = genetic(new_src_struct, target, source, train_pos, train_neg, 
                      train_facts, kb_source, kb_target, pred_target,
                      NUM_GEN=14, pop_size=params['num_individuals'], crossover=params['crossover_rate'],
                      mutation=params['mutation_rate'], crossover_type='tree_ind', revision='guided')

    return res[1][-1]

In [10]:
res_gp = gp_minimize(objective, space, n_calls=10, random_state=0)

print("Best score=%.4f" % res_gp.fun)

In [None]:
print(f"BEST RESULT: {res_gp.x}")

In [None]:
for _round in range(0, N_ROUNDS):
    print(f"ROUND {str(_round+1)}")
    res_s_genetic = genetic(new_src_struct, target, source, 
                    train_pos, train_neg, train_facts, 
                    kb_source, kb_target, pred_target,
                    NUM_GEN=14, pop_size=num_ind, 
                    mutation=mutation_rate, crossover=crossover_rate,
                    crossover_type='tree_ind', revision='guided')
    
    final_results = {}
    final_results[f'{source}->{target}'] = res_s_genetic
    
    individuals = get_k_best_individuals(res_s_genetic[0].population, 3)
    
    n_ind = 1
    for individual in individuals:
        rrefine = []
        rtransfer = []
        print("INDIVIDUO ", n_ind)
        refine, transfer = get_refine_transfer(individual, source, target, 'imdb', 'cora')
        rrefine.append(refine)
        rtransfer.append(transfer)
        res = []
        inf = []
        for i in range(len(train_pos)):
            ttrain = []
            test_neg = []; test_pos = []; test_facts = []
            for index in range(0, len(train_pos)):
                if index == i:
                    ttrain = [train_pos[index], np.random.choice(train_neg[index], 2*len(train_pos[index])), train_facts[index]]
                else:
                    test_pos.extend(train_pos[index])
                    test_neg.extend(train_neg[index])
                    test_facts.extend(train_facts[index])
            test = [test_pos, test_neg, test_facts]
            res_ =  test_refine_transfer(kb_target, target, refine, transfer, ttrain, test)
            res.append(res_)

            thisFile = f'boostsrl/test/results_{target}.db'
            base = os.path.splitext(thisFile)[0]
            os.rename(thisFile, base + ".txt")
            tt = open(f'boostsrl/test/results_{target}.txt', 'r').readlines()
            final = []
            for i in tt:
                final.append(i.replace('\n', ''))
            inf.append(final)


        final_results[f'test:{source}->{target}'] = res
        final_results[f'refine:{source}->{target}'] = rrefine
        final_results[f'transfer:{source}->{target}'] = rtransfer
        final_results[f'inf:{source}->{target}'] = inf
        save_groot_results(f'groot_experiments/s_genetic/{EXPERIMENT_NAME}_{source}_{target}_{str(crossover_rate)}_{str(mutation_rate)}_{str(num_ind)}_14', n_ind, final_results, source, target)
        n_ind += 1