In [1]:
import numpy as np, matplotlib, time, copy, random, math

%load_ext line_profiler
directory = "../data/"
file_paths = [
    "a_example.txt",
    "b_read_on.txt",
    "c_incunabula.txt",
    "d_tough_choices.txt",
    "e_so_many_books.txt",
    "f_libraries_of_the_world.txt",
]

### Utils

In [2]:
def process_file(filePath):
    with open(directory + filePath, "r") as file:
        content = file.read().split("\n")[:-1]
        B, L, D = content[0].split()
        B, L, D = int(B), int(L), int(D)
    bookValues = [
        int(n) for n in content[1].split()
    ]  # tuple([int(n) for n in content[1].split()])
    libraries = []

    def book_sort(book):
        return bookValues[book]

    for i in range(L):
        N, T, M = content[2 + 2 * i].split()
        N, T, M = int(N), int(T), int(M)
        book_ids = tuple(
            sorted(
                [int(id) for id in content[2 + 2 * i + 1].split()],
                key=book_sort,
                reverse=True,
            )
        )
        libraries.append(Library(i, N, T, M, book_ids))
    return ((B, L, D), bookValues, libraries)

In [3]:
def check_solution(D, libraries):
    days = 0
    prev_books = set()
    for library in libraries:
        days += library.signup_time
        if len(library.book_ids) > (D - days) * library.books_per_day:
            print("what", len(library.book_ids), (D - days) * library.books_per_day)
        assert not any([(book in prev_books) for book in library.book_ids])
        prev_books.update(library.book_ids)
    assert days < D


def score_solution(libraries, book_values):
    score = 0
    for library in libraries:
        for book_id in library.book_ids:
            score += book_values[book_id]
    return score

### Classes

In [4]:
class Library:
    def __init__(self, index, N, T, M, book_ids: set):
        self.id = index
        self.size = N
        self.signup_time = T
        self.books_per_day = M
        self.book_ids = book_ids

    def get_n_best_books(self, n, book_values):
        result = []
        k = 0
        for i in range(len(self.book_ids)):
            if book_values[self.book_ids[i]] != 0:
                result.append(self.book_ids[i])
                k += 1
                if k == n:
                    return tuple(result)
        return tuple(result)

    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

In [5]:
class ProblemSolver:
    def __init__(self, B, L, D, book_values, libraries):
        self.B, self.L, self.D = B, L, D
        self.book_values = book_values
        self.libraries = libraries

    def get_solution(self, selected_lib_ids=None):
        if not selected_lib_ids:
            selected_lib_ids = self.get_individual()
        local_book_values = copy.copy(self.book_values)
        day = 0
        selected_libraries = [copy.copy(self.libraries[i]) for i in selected_lib_ids]
        it = 0
        while it < len(selected_libraries):
            next_library = selected_libraries[it]
            day += next_library.signup_time
            if day >= self.D:
                break
            next_library.book_ids = next_library.get_n_best_books(
                (self.D - day) * next_library.books_per_day, local_book_values
            )
            for book_id in next_library.book_ids:
                local_book_values[book_id] = 0
            it += 1
        return selected_libraries[:it]

    def get_individual(self):
        pass

In [6]:
class HeurysticSolver(ProblemSolver):
    def library_score(self, lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores / lib.signup_time

    def get_individual(self):
        lib_ids = [i for i in range(len(self.libraries))]
        lib_ids.sort(key=self.library_score, reverse=True)
        day = 0
        for i in range(len(lib_ids)):
            day += self.libraries[lib_ids[i]].signup_time
            if day >= self.D:
                return tuple(lib_ids[:i])
        return tuple(lib_ids)

In [7]:
class PowerSolver(HeurysticSolver):
    def library_score(self, lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores / lib.signup_time ** (
            1 + lib.signup_time / self.D
        )

In [8]:
class SimpleScoreVarianceSolver(HeurysticSolver):
    def library_score(self, lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001, np.var(best_scores))
        return sum_of_best_book_scores / book_variance

In [9]:
class SquareScoreVarianceSolver(HeurysticSolver):
    def library_score(self, lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001, np.var(best_scores))
        return sum_of_best_book_scores ** 2 / (
            lib.signup_time * lib.signup_time * math.sqrt(book_variance)
        )

In [10]:
class BookNumbersSolver(HeurysticSolver):
    def library_score(self, lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001, np.var(best_scores))
        return sum_of_best_book_scores ** 2 / (
            book_variance * len(n_best_books) * lib.signup_time
        )

In [11]:
class ScoreSquareSolver(HeurysticSolver):
    def library_score(self, lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores ** 2 / lib.signup_time

In [12]:
# sum_score = 0
# solvers_cls = [
#     HeurysticSolver,
#     PowerSolver,
#     SimpleScoreVarianceSolver,
#     SquareScoreVarianceSolver,
#     BookNumbersSolver,
#     ScoreSquareSolver,
# ]
# for file_path in file_paths:
#     (B, L, D), book_values, libraries = process_file(file_path)
#     solvers = [cl(B, L, D, book_values, libraries) for cl in solvers_cls]
#     solutions = [solver.get_solution() for solver in solvers]
#     [check_solution(D, solution) for solution in solutions]
#     scores = [score_solution(solution, book_values) for solution in solutions]
#     print(scores)
#     score = max(scores)
#     print(score)
#     sum_score += score
# print("SUM", sum_score)

### Random Solver

In [13]:
class RandomSolver(ProblemSolver):
    def get_individual(self):
        lib_ids = [i for i in range(len(self.libraries))]
        random.shuffle(lib_ids)
        day = 0
        for i in range(len(lib_ids)):
            day += self.libraries[lib_ids[i]].signup_time
            if day >= self.D:
                return tuple(lib_ids[:i])
        return tuple(lib_ids)

### Genetic Solver

In [14]:
class GeneticSolver(ProblemSolver):
    def __init__(self, B, L, D, book_values, libraries, pop_size = 20, p_mutate = 0.5, surv_rate = 0.2, tournament_size = 3):
        super().__init__(B, L, D, book_values, libraries)
        
        #min of popsize and all possible permutations
        num = 1
        mult = len(libraries)
        while num < pop_size and mult != 1:
            num*=mult
            mult-=1
        self.pop_size = min(pop_size,num)
        self.p_mutate = p_mutate
        self.tournament_size = min(self.pop_size,tournament_size)
        self.surv_rate = surv_rate
        
        self.indiv_scores = dict()
        self.lib_ids = [i for i in range(len(libraries))]
        self.lib_scores = [self.lib_score(lib) for lib in self.libraries]
        
    def lib_score(self, lib):
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(
            delta_time * lib.signup_time, self.book_values
        )
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores / lib.signup_time
        
        
    def mutate(self, individual):
        new_one = list(individual)
        if random.random() <=0.5:
            #internal mutation
            length = len(new_one)
            for i in range(4):
                a,b = random.sample(range(length), 2)
                new_one[a],new_one[b] =new_one[b],new_one[a]
            return tuple(individual)                  
        else:
            #exxternal mutation
            additional = random.choices(self.lib_ids, k=4, weights = self.lib_scores)
            unique = set(additional)
            for un in unique:
                new_one.insert(random.randrange(0,len(new_one)), un)
            return self.cut(new_one)
            
    def cut(self, new_one):
        day =0;
        for i in range(len(new_one)):
            day+=self.libraries[new_one[i]].signup_time
            if day >= self.D:
                return tuple(new_one[:i])
        return tuple(new_one)
    
    def crossover(self,indiv_1, indiv_2):
        set1 = set(indiv_1)
        set2 = set(indiv_2)
        common = set1.intersection(set2)
        maxLen = max(len(indiv_1), len(indiv_2))
        child1,child2,=[-1]*maxLen,[-1]*maxLen
        only1,only2 = [],[]
        for i in range(len(indiv_1)):
            if indiv_1[i] in common:
                child1[i] =  indiv_1[i]
            else:
                only1.append(indiv_1[i])
        for i in range(len(indiv_2)):
            if indiv_2[i] in common:
                child2[i] = indiv_2[i]
            else:
                only2.append(indiv_2[i])
        for i in range(maxLen):
            if child1[i] == -1 and len(only2) > 0:
                child1[i] = only2.pop(0)
            if child2[i] == -1 and len(only1) > 0:
                child2[i] = only1.pop(0)
        child1 = [i for i in child1 if i !=-1]
        child2 = [i for i in child2 if i !=-1]
                        
        child1 = self.cut(child1)
        child2 = self.cut(child2)
        if random.random()<=self.p_mutate:
            child1 = self.mutate(child1)
        if random.random()<=self.p_mutate:
            child2 = self.mutate(child2)
        return child1, child2
    
    def tournament(self, indivs):
        return max(indivs, key = self.indiv_scores.get);
    
    def select_survivals(self, population):
        k = int(5*self.pop_size*self.surv_rate)
        weights = [self.indiv_scores[indiv] for indiv in population]
        chosen = []
        already_chosen = set()
        desired = int(self.pop_size*self.surv_rate)
        while len(already_chosen) < desired:
            candidates = random.choices(population, k = k, weights = weights)
            for cand in candidates:
                if cand not in already_chosen:
                    chosen.append(cand)
                    already_chosen.add(cand)
        return chosen
    
    def get_individual(self):
        solvers_cls = [HeurysticSolver, PowerSolver, SimpleScoreVarianceSolver, SquareScoreVarianceSolver, BookNumbersSolver, ScoreSquareSolver]
        solvers = [cl(B,L,D,book_values, libraries) for cl in solvers_cls]
        population = [solver.get_individual() for solver in solvers]
        randomSolver = RandomSolver(B,L,D,book_values, libraries)
        while len(population) < self.pop_size:
            population.append(randomSolver.get_individual())
        for indiv in population:
            sol = self.get_solution(indiv)
            self.indiv_scores[indiv] = score_solution(sol, self.book_values)
            
        startTime = time.time()
        while(True):
            if(time.time() - startTime > 270):
                break
            new_population = [ self.tournament(random.sample(population, self.tournament_size)) for i in range(int(self.pop_size*self.surv_rate))]
            while len(new_population) < self.pop_size:
                indiv_1 = self.tournament(random.sample(population, self.tournament_size))
                indiv_2 = self.tournament(random.sample(population, self.tournament_size))
                if random.random()<= 0.5:
                    new_child1 = self.mutate(indiv_1)
                    new_child2 = self.mutate(indiv_2)
                else:
                    new_child1, new_child2 = self.crossover(indiv_1, indiv_2)
                if new_child1 not in self.indiv_scores:
                    self.indiv_scores[new_child1] = score_solution(self.get_solution(new_child1), book_values)
                if new_child2 not in self.indiv_scores:
                    self.indiv_scores[new_child2] = score_solution(self.get_solution(new_child2), book_values)
                new_population.append(new_child1)
                new_population.append(new_child2)
            population = tuple(new_population)
        return max(self.indiv_scores, key = self.indiv_scores.get)
            
        
        

In [15]:
# for file_path in file_paths:
#     print(file_path)
#     (B, L, D), book_values, libraries = process_file(file_path)
#     solver = GeneticSolver(B, L, D, book_values, libraries, 20)
#     indiv = solver.get_individual()
# #     print(indiv)
#     mutated = solver.mutate(indiv)
#     check_solution(D,solver.get_solution(mutated))

In [16]:
sum_score =0
for file_path in file_paths:
    (B, L, D), book_values, libraries = process_file(file_path)
    solver = GeneticSolver(B, L, D, book_values, libraries, 20)
    solution = solver.get_solution()
    check_solution(D, solution)
    score = score_solution(solution, book_values)
    print("S: ",score)
    sum_score += score

print(sum_score)

S:  21
S:  5903200
S:  5645747
S:  4816500
S:  4810143
S:  5323691
26499302


In [17]:
time.time()

1609012111.0126586