In [1]:
import numpy as np, matplotlib.pyplot as plt, time, copy, random, math
#%load_ext line_profiler
directory = '../data/'
file_paths = ['a_example.txt', 'b_read_on.txt', 'c_incunabula.txt','d_tough_choices.txt', 'e_so_many_books.txt', 'f_libraries_of_the_world.txt']

### Classes

In [2]:
class Library():
    def __init__(self,index,N,T,M):
        self.id = index
        self.size  = N
        self.signup_time = T
        self.books_per_day = M
        self.book_ids = set()
        
    def add_book(self,book):
        self.book_ids.add(book)
        
    def remove_book(self,book):
        self.book_ids.remove(book)
        
    
    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

### Utils

In [3]:
def process_file(filePath):
    with open(directory+filePath,'r') as file:
        content = file.read().split('\n')[:-1];
        B,L,D = content[0].split()
        B,L,D = int(B),int(L),int(D)
    book_libraries = [set() for i in range(B)]
    bookValues = [int(n) for n in content[1].split()]
    libraries = []
    for i in range(L):
        N,T,M = content[2+2*i].split()
        N,T,M=int(N),int(T),int(M)
        book_ids = set(int(id) for id in content[2+2*i+1].split())
        library = Library(i,N,T,M)
        for book_id in book_ids:
            book_libraries[book_id].add(i)
            library.add_book(book_id)
        libraries.append(library)
    return ((B,L,D), bookValues,book_libraries, libraries)

In [26]:
def check_solution(D, solution_libraries):
    days = 0
    prev_books = set()
    lib_ids = [lib.id for lib in solution_libraries]
    assert(len(lib_ids) == len(set(lib_ids)))
    for library in solution_libraries:
        days+=library.signup_time
        if len(library.book_ids) > (D-days)*library.books_per_day:
            print("what", len(library.book_ids), (D-days)*library.books_per_day)
        assert(len(library.book_ids) == len(set(library.book_ids)))
        assert(not any([(book in prev_books) for book in library.book_ids]))
        prev_books.update(library.book_ids)
    assert(days < D)
    
def score_solution(solution_libraries, book_values):
    score=0
    for library in solution_libraries:
        for book_id in library.book_ids:
            score+= book_values[book_id]
    return score

In [42]:
class GreedyIntervalSolver:
    def __init__(self, B, L, D, book_values, book_libraries, libraries):
        self.B, self.L, self.D = B, L, D
        self.book_values = book_values
        self.libraries = copy.deepcopy(libraries)
        self.book_libraries = copy.deepcopy(book_libraries)

    def book_score(self, book_id):
        return self.book_values[book_id] - 0.7 * len(self.book_libraries[book_id])

    def get_n_best_books(self, lib, n):
        return sorted(lib.book_ids, key=self.book_score, reverse=True)[:n]

    def library_score(self, lib_id, current_day):
        lib = self.libraries[lib_id]
        delta_time = self.D - current_day - lib.signup_time
        n_best_books = self.get_n_best_books(lib, min(delta_time * lib.books_per_day, len(lib.book_ids)))
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        sum_of_best_book_scores /= lib.signup_time
        return sum_of_best_book_scores  # / lib.signup_time ** (1 + lib.signup_time / self.D)

    def get_solution(self):
        libraries = self.libraries
        
        remaining_libraries = set(lib.id for lib in libraries)
        day = 0
        chosen_libraries = []
        it = 0
        interval = 25#max(1, int(self.L/1000))
        while len(remaining_libraries) > 0:
            if it%interval == 0:
                library_scores = [(self.library_score(lib_id, day), lib_id) for lib_id in remaining_libraries]
            it += 1
            max_el = max(library_scores, key=lambda x: x[0])
            library_scores.remove(max_el)
            score, lib_id = max_el
            remaining_libraries.remove(lib_id)
            library = libraries[lib_id]
            if day + library.signup_time >= self.D:
                break
            day += library.signup_time
            books_to_take = (self.D - day) * library.books_per_day
            sorted_books = sorted(library.book_ids, key=self.book_score, reverse=True)
            for book_id in sorted_books[:books_to_take]:
                for lib_id in self.book_libraries[book_id]:
                    if lib_id != library.id:
                        libraries[lib_id].remove_book(book_id)

            for book_id in sorted_books[books_to_take:]:
                self.book_libraries[book_id].remove(library.id)

            library.book_ids = sorted_books[:books_to_take]
            chosen_libraries.append(library)
        return chosen_libraries

    def get_individual_from_solution(self, solution):
        return tuple([lib.id for lib in solution])

In [43]:
sum_score =0
for file_path in file_paths:
    (B, L, D), book_values,book_counts, libraries = process_file(file_path)
    solver = GreedyIntervalSolver(B, L, D, book_values, book_counts, libraries)
    solution = solver.get_solution()
    print("L",len(solution))
    check_solution(D, solution)
    score = score_solution(solution, book_values)
    print("S: ",score)
    sum_score += score

print(sum_score)

L 2
S:  21
L 90
S:  5822900
L 1284
S:  5687459
L 15000
S:  4975360
L 150
S:  5057067
L 17
S:  5240157
26782964
