In [1]:
import numpy as np, matplotlib, time, copy, random, math
from sortedcontainers import SortedSet, SortedList
%load_ext line_profiler
directory = '../data/'
file_paths = ['a_example.txt', 'b_read_on.txt', 'c_incunabula.txt','d_tough_choices.txt', 'e_so_many_books.txt', 'f_libraries_of_the_world.txt']

### Classes

In [2]:
class Library():
    def __init__(self,index,N,T,M):
        self.id = index
        self.size  = N
        self.signup_time = T
        self.books_per_day = M
        self.book_ids = set()
        
    def add_book(self,book):
        self.book_ids.add(book)
        
    def remove_book(self,book):
        self.book_ids.remove(book)
        
    
    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

### Utils

In [3]:
def process_file(filePath):
    with open(directory+filePath,'r') as file:
        content = file.read().split('\n')[:-1];
        B,L,D = content[0].split()
        B,L,D = int(B),int(L),int(D)
    book_libraries = [set() for i in range(B)]
    bookValues = [int(n) for n in content[1].split()]
    libraries = []
    for i in range(L):
        N,T,M = content[2+2*i].split()
        N,T,M=int(N),int(T),int(M)
        book_ids = [int(id) for id in content[2+2*i+1].split()]
        library = Library(i,N,T,M)
        for book_id in book_ids:
            book_libraries[book_id].add(i)
            library.add_book(book_id)
        libraries.append(library)
    return ((B,L,D), bookValues,book_libraries, libraries)

In [4]:
def check_solution(D, libraries):
    days = 0
    prev_books = set()
    for library in libraries:
        days+=library.signup_time
        if len(library.book_ids) > (D-days)*library.books_per_day:
            print("what", len(library.book_ids), (D-days)*library.books_per_day)
        assert(len(library.book_ids) == len(set(library.book_ids)))
        assert(not any([(book in prev_books) for book in library.book_ids]))
        prev_books.update(library.book_ids)
    assert(days < D)
    
def score_solution(libraries, book_values):
    score=0
    for library in libraries:
        for book_id in library.book_ids:
            score+= book_values[book_id]
    return score

In [12]:
DIVIDOR = 100
class GreedyIntervalSolver:
    def __init__(self, B, L, D, book_values, book_libraries, libraries):
        self.B, self.L, self.D = B,L,D
        self.book_values = book_values
        self.libraries = libraries
        self.book_libraries = copy.copy(book_libraries)
        
    def book_score(self, book_id):
        return book_values[book_id] #- 0.7*len(self.book_libraries[book_id])
    
    def get_n_best_books(self,lib, n):
        return sorted(lib.book_ids,key=self.book_score, reverse=True)[:n]
    
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores/lib.signup_time**0.95#**(1+lib.signup_time/self.D)
    
    def get_solution(self):
        libraries = [copy.copy(library) for library in self.libraries]
        remaining_libraries = set(lib.id for lib in libraries)
        day = 0
        chosen_libraries = []
        it=0
        stop_calc_score=False
        while len(remaining_libraries) > 0:
            if (not stop_calc_score) and (it%DIVIDOR == 0):
                library_scores = [((lib.signup_time, self.library_score(lib_id)),lib_id) for lib_id in remaining_libraries]
            it+=1
            max_el = max(library_scores, key = lambda x:x[0])
            library_scores.remove(max_el)
            score, lib_id = max_el
            remaining_libraries.remove(lib_id)
            library = libraries[lib_id]
            if day + library.signup_time>= self.D:
                stop_calc_score = True
                continue
            day+=library.signup_time
            books_to_take = (self.D - day)*library.books_per_day
            sorted_books = sorted(library.book_ids, key=self.book_score, reverse=True)
            for book_id in sorted_books[:books_to_take]:
                for lib_id in self.book_libraries[book_id]:
                    if lib_id != library.id:
                        libraries[lib_id].remove_book(book_id)

            for book_id in sorted_books[books_to_take:]:
                self.book_libraries[book_id].remove(library.id)

            library.book_ids = sorted_books[:books_to_take]
            chosen_libraries.append(library)
        return chosen_libraries
    
    def get_individual_from_solution(self,solution):
        return tuple([lib.id for lib in solution])
    
    
# sum_score = 0
# for file_path in file_paths[1:]:
#     print(file_path)
#     (B,L,D),book_values,book_libraries, libraries = process_file(file_path)
#     solver = GreedyIntervalSolver(B,L,D,book_values,book_libraries, libraries)
#     %time solution = solver.get_solution()
#     check_solution(D,solution)
#     score = score_solution(solution, book_values)
#     print(score)
#     sum_score += score
# print(sum_score)

In [13]:
# for b in [100,50,30,10]:
#     DIVIDOR =b
#     sum_score = 0
#     for file_path in file_paths[3:4]:
#         print(file_path)
#         (B,L,D),book_values,book_libraries, libraries = process_file(file_path)
#         solver = GreedyIntervalSolver(B,L,D,book_values,book_libraries, libraries)
#         %time solution = solver.get_solution()
#         check_solution(D,solution)
#         score = score_solution(solution, book_values)
#         sum_score += score
#     print(sum_score)

In [17]:
DIVIDOR =10
sum_score = 0
for file_path in file_paths:
    print(file_path)
    (B,L,D),book_values,book_libraries, libraries = process_file(file_path)
    solver = GreedyIntervalSolver(B,L,D,book_values,book_libraries, libraries)
    %time solution = solver.get_solution()
    check_solution(D,solution)
    score = score_solution(solution, book_values)
    print(score)
    sum_score += score
print(sum_score)

a_example.txt
CPU times: user 421 µs, sys: 1 µs, total: 422 µs
Wall time: 427 µs
21
b_read_on.txt
CPU times: user 94.4 ms, sys: 0 ns, total: 94.4 ms
Wall time: 94.2 ms
5822900
c_incunabula.txt
CPU times: user 9.34 s, sys: 0 ns, total: 9.34 s
Wall time: 9.34 s
5689428
d_tough_choices.txt
CPU times: user 1min 41s, sys: 0 ns, total: 1min 41s
Wall time: 1min 41s
4974905
e_so_many_books.txt
CPU times: user 1.13 s, sys: 2 µs, total: 1.13 s
Wall time: 1.13 s
4956224
f_libraries_of_the_world.txt
CPU times: user 378 ms, sys: 0 ns, total: 378 ms
Wall time: 378 ms
5196760
26640238


In [8]:
class SimpleGreedyIntervalSolver(GreedyIntervalSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        return min(delta_time*lib.books_per_day,len(lib.book_ids))/lib.signup_time

In [18]:
DIVIDOR =30
sum_score = 0
for file_path in file_paths:
    print(file_path)
    (B,L,D),book_values,book_libraries, libraries = process_file(file_path)
    solver = SimpleGreedyIntervalSolver(B,L,D,book_values,book_libraries, libraries)
    %time solution = solver.get_solution()
    check_solution(D,solution)
    score = score_solution(solution, book_values)
    print(score)
    sum_score += score
print(sum_score)

a_example.txt
CPU times: user 774 µs, sys: 2 µs, total: 776 µs
Wall time: 783 µs
21
b_read_on.txt
CPU times: user 29 ms, sys: 3 µs, total: 29.1 ms
Wall time: 29.1 ms
5822900
c_incunabula.txt
CPU times: user 7.4 s, sys: 0 ns, total: 7.4 s
Wall time: 7.4 s
5623848
d_tough_choices.txt
CPU times: user 1min 2s, sys: 0 ns, total: 1min 2s
Wall time: 1min 2s
4976335
e_so_many_books.txt
CPU times: user 133 ms, sys: 0 ns, total: 133 ms
Wall time: 133 ms
4938965
f_libraries_of_the_world.txt
CPU times: user 147 ms, sys: 0 ns, total: 147 ms
Wall time: 147 ms
5317660
26679729
