In [2]:
import numpy as np, matplotlib, time, copy, random, math
%load_ext line_profiler
directory = '../data/'
file_paths = ['a_example.txt', 'b_read_on.txt', 'c_incunabula.txt','d_tough_choices.txt', 'e_so_many_books.txt', 'f_libraries_of_the_world.txt']

### Utils

In [3]:
ALPHA = 1

In [4]:
def process_file(filePath):
    with open(directory+filePath,'r') as file:
        content = file.read().split('\n')[:-1];
        B,L,D = content[0].split()
        B,L,D = int(B),int(L),int(D)
    bookValues = [int(n) for n in content[1].split()]#tuple([int(n) for n in content[1].split()])
    bookCounts = [0]*B
    libraries = []
    def book_sort(book):
        return bookValues[book]
    for i in range(L):
        N,T,M = content[2+2*i].split()
        N,T,M=int(N),int(T),int(M)
        book_ids = set([int(id) for id in content[2+2*i+1].split()])
        for book_id in book_ids:
            bookCounts[book_id] +=1
        libraries.append(Library(i,N,T,M,book_ids))
    return ((B,L,D), bookValues,bookCounts, libraries)

In [5]:
def check_solution(D, libraries):
    days = 0
    prev_books = set()
    for library in libraries:
        days+=library.signup_time
        if len(library.book_ids) > (D-days)*library.books_per_day:
            print("what", len(library.book_ids), (D-days)*library.books_per_day)
        assert(len(library.book_ids) == len(set(library.book_ids)))
        assert(not any([(book in prev_books) for book in library.book_ids]))
        prev_books.update(library.book_ids)
    assert(days < D)
    
def score_solution(libraries, book_values):
    score=0
    for library in libraries:
        for book_id in library.book_ids:
            score+= book_values[book_id]
    return score

### Classes

In [6]:
class Library():
    def __init__(self,index,N,T,M, book_ids: set):
        self.id = index
        self.size  = N
        self.signup_time = T
        self.books_per_day = M
        self.book_ids = book_ids
    
    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

In [7]:
class ProblemSolver:
    def __init__(self, B, L, D, book_values, book_counts, libraries):
        self.B, self.L, self.D = B,L,D
        self.book_values = book_values
        self.libraries = libraries
        self.book_counts = book_counts
        
    def book_score(self, book):
        return self.book_values[book]
        
    def get_solution(self,selected_lib_ids=None):
        if not selected_lib_ids:
            selected_lib_ids = self.get_individual()
        local_book_values = copy.copy(self.book_values)
        day = 0
        selected_libraries = [copy.copy(self.libraries[i]) for i in selected_lib_ids]
        it=0;
        already_scanned_books = set()
        while it < len(selected_libraries):
            next_library = selected_libraries[it]
            day+=next_library.signup_time
            if day>=self.D:
                break
            next_library.book_ids = next_library.book_ids - already_scanned_books
            next_library.book_ids = sorted(next_library.book_ids,key=self.book_score, reverse=True)[:(self.D-day)*next_library.books_per_day]
            already_scanned_books.update(next_library.book_ids)
            it+=1
        return selected_libraries[:it]
    
    def get_individual(self):
        pass

In [8]:
class HeurysticSolver(ProblemSolver):
    def get_n_best_books(self,lib, n):
        return sorted(lib.book_ids,key=self.book_score, reverse=True)[:n]
    
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores/lib.signup_time
    
    def get_individual(self):
        lib_ids = [i for i in range(len(self.libraries))]
        lib_ids.sort(key = self.library_score, reverse= True)
        day = 0
        for i in range(len(lib_ids)):
            day+=self.libraries[lib_ids[i]].signup_time
            if day >= self.D:
                return tuple(lib_ids[:i])
        return tuple(lib_ids)

In [9]:
class PowerSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores/lib.signup_time**(1+lib.signup_time/self.D)

In [10]:
class SimpleScoreVarianceSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001,np.var(best_scores))
        return sum_of_best_book_scores/book_variance

In [11]:
class SquareScoreVarianceSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001,np.var(best_scores))
        return sum_of_best_book_scores**2/(lib.signup_time*lib.signup_time*math.sqrt(book_variance))

In [12]:
class BookNumbersSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001,np.var(best_scores))
        return sum_of_best_book_scores**2/(book_variance*len(n_best_books)*lib.signup_time)

In [13]:
class ScoreSquareSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores**2/lib.signup_time

In [14]:
class BookCountSolver(HeurysticSolver):
    def book_score(self, book):
        return self.book_values[book] - 0.7*self.book_counts[book]

In [15]:
class BookCountPowerSolver(PowerSolver):
    def book_score(self, book):
        return self.book_values[book] - 0.7*self.book_counts[book]

In [16]:
sum_score = 0
solvers_cls = [HeurysticSolver, PowerSolver, SimpleScoreVarianceSolver, SquareScoreVarianceSolver, BookNumbersSolver, ScoreSquareSolver, BookCountSolver, BookCountPowerSolver]
for file_path in file_paths:
    (B,L,D),book_values,book_counts, libraries = process_file(file_path)
    solvers = [cl(B,L,D,book_values,book_counts, libraries) for cl in solvers_cls]
    %time solutions = [solver.get_solution() for solver in solvers]
#     print(list(lib.id for lib in solutions[0]))
    [check_solution(D, solution) for solution in solutions]
    scores = [score_solution(solution, book_values) for solution in solutions]
    print(scores)
    score = max(scores)
    print(score)
    sum_score+=score
print("SUM",sum_score)

CPU times: user 579 µs, sys: 307 µs, total: 886 µs
Wall time: 661 µs
[21, 21, 21, 21, 21, 21, 21, 21]
21
CPU times: user 425 ms, sys: 0 ns, total: 425 ms
Wall time: 423 ms
[5822900, 5822900, 5822900, 5822900, 5822900, 5822900, 5822900, 5822900]
5822900
CPU times: user 1.95 s, sys: 28.5 ms, total: 1.97 s
Wall time: 1.94 s
[5645747, 5640815, 1167633, 5632921, 5307077, 5525478, 5645747, 5640815]
5645747
CPU times: user 5.52 s, sys: 13.6 ms, total: 5.53 s
Wall time: 5.52 s
[4815395, 4815395, 4815395, 4815395, 4815395, 4815395, 4815395, 4815395]
4815395
CPU times: user 2.01 s, sys: 305 µs, total: 2.01 s
Wall time: 2 s
[5020017, 5022146, 773970, 4791445, 2796593, 4741433, 5021339, 5022881]
5022881
CPU times: user 2.26 s, sys: 3.4 ms, total: 2.26 s
Wall time: 2.26 s
[5240161, 5238624, 1195114, 5211764, 4111186, 5083231, 5240157, 5238622]
5240161
SUM 26547105


In [13]:
sum_score = 0
for file_path in file_paths:
    (B,L,D),book_values, libraries = process_file(file_path)
    solver = HeurysticSolver(B,L,D,book_values, libraries)
    solution = solver.get_solution()
    check_solution(D, solution)
    power_solver = PowerSolver(B,L,D,book_values, libraries)
    power_solution = power_solver.get_solution()
    check_solution(D, power_solution)
    score = max(score_solution(solution, book_values), score_solution(power_solution, book_values))
    print(score)
    sum_score +=score
print(sum_score)

21
5822900
5645747
4815395
4642132
5240161
26166356
