In [2]:
import numpy as np, matplotlib, time, copy, random, math
from sortedcontainers import SortedSet, SortedList
%load_ext line_profiler
directory = '../data/'
file_paths = ['a_example.txt', 'b_read_on.txt', 'c_incunabula.txt','d_tough_choices.txt', 'e_so_many_books.txt', 'f_libraries_of_the_world.txt']

### Classes

In [4]:
class Library():
    def __init__(self,index,N,T,M):
        self.id = index
        self.size  = N
        self.signup_time = T
        self.books_per_day = M
        self.book_ids = set()
        
    def add_book(self,book):
        self.book_ids.add(book)
        
    def remove_book(self,book):
        self.book_ids.remove(book)
        
    
    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

### Utils

In [14]:
def process_file(filePath):
    with open(directory+filePath,'r') as file:
        content = file.read().split('\n')[:-1];
        B,L,D = content[0].split()
        B,L,D = int(B),int(L),int(D)
    book_libraries = [set() for i in range(B)]
    bookValues = [int(n) for n in content[1].split()]
    libraries = []
    for i in range(L):
        N,T,M = content[2+2*i].split()
        N,T,M=int(N),int(T),int(M)
        book_ids = [int(id) for id in content[2+2*i+1].split()]
        library = Library(i,N,T,M)
        for book_id in book_ids:
            book_libraries[book_id].add(i)
            library.add_book(book_id)
        libraries.append(library)
    return ((B,L,D), bookValues,book_libraries, libraries)

In [34]:
def check_solution(D, libraries):
    days = 0
    prev_books = set()
    for library in libraries:
        days+=library.signup_time
        if len(library.book_ids) > (D-days)*library.books_per_day:
            print("what", len(library.book_ids), (D-days)*library.books_per_day)
        assert(len(library.book_ids) == len(set(library.book_ids)))
        assert(not any([(book in prev_books) for book in library.book_ids]))
        prev_books.update(library.book_ids)
    assert(days < D)
    
def score_solution(libraries, book_values):
    score=0
    for library in libraries:
        for book_id in library.book_ids:
            score+= book_values[book_id]
    return score

In [37]:
DIVIDOR = 100
class GreedyIntervalSolver:
    def __init__(self, B, L, D, book_values, book_libraries, libraries):
        self.B, self.L, self.D = B,L,D
        self.book_values = book_values
        self.libraries = libraries
        self.book_libraries = copy.copy(book_libraries)
        
    def book_score(self, book_id):
        return book_values[book_id] #- 0.7*len(self.book_libraries[book_id])
    
    def get_n_best_books(self,lib, n):
        return sorted(lib.book_ids,key=self.book_score, reverse=True)[:n]
    
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = self.get_n_best_books(lib, delta_time*lib.books_per_day)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores/lib.signup_time#**(1+lib.signup_time/self.D)
    
    def get_solution(self):
        libraries = [copy.copy(library) for library in self.libraries]
        remaining_libraries = set(lib.id for lib in libraries)
        day = 0
        chosen_libraries = []
        it=0
        while len(remaining_libraries) > 0:
            if it%DIVIDOR == 0:
                library_scores = [(self.library_score(lib_id),lib_id) for lib_id in remaining_libraries]
            it+=1
            max_el = max(library_scores, key = lambda x:x[0])
            library_scores.remove(max_el)
            score, lib_id = max_el
            remaining_libraries.remove(lib_id)
            library = libraries[lib_id]
            if day + library.signup_time>= self.D:
                continue
            day+=library.signup_time
            books_to_take = (self.D - day)*library.books_per_day
            sorted_books = sorted(library.book_ids, key=self.book_score, reverse=True)
            for book_id in sorted_books[:books_to_take]:
                for lib_id in self.book_libraries[book_id]:
                    if lib_id != library.id:
                        libraries[lib_id].remove_book(book_id)

            for book_id in sorted_books[books_to_take:]:
                self.book_libraries[book_id].remove(library.id)

            library.book_ids = sorted_books[:books_to_take]
            chosen_libraries.append(library)
        return chosen_libraries
    
    def get_individual_from_solution(self,solution):
        return tuple([lib.id for lib in solution])
    
    
# sum_score = 0
# for file_path in file_paths[1:]:
#     print(file_path)
#     (B,L,D),book_values,book_libraries, libraries = process_file(file_path)
#     solver = GreedyIntervalSolver(B,L,D,book_values,book_libraries, libraries)
#     %time solution = solver.get_solution()
#     check_solution(D,solution)
#     score = score_solution(solution, book_values)
#     print(score)
#     sum_score += score
# print(sum_score)

In [39]:
for b in [5,4,3,2]:
    DIVIDOR =b
    sum_score = 0
    for file_path in file_paths[3:4]:
        print(file_path)
        (B,L,D),book_values,book_libraries, libraries = process_file(file_path)
        solver = GreedyIntervalSolver(B,L,D,book_values,book_libraries, libraries)
        %time solution = solver.get_solution()
        check_solution(D,solution)
        score = score_solution(solution, book_values)
        sum_score += score
    print(sum_score)

d_tough_choices.txt
CPU times: user 4min 41s, sys: 12 ms, total: 4min 41s
Wall time: 4min 41s
4980625
d_tough_choices.txt
CPU times: user 5min 51s, sys: 28 ms, total: 5min 51s
Wall time: 5min 51s
4978870
d_tough_choices.txt
CPU times: user 7min 7s, sys: 7.99 ms, total: 7min 7s
Wall time: 7min 7s
4986930
d_tough_choices.txt
CPU times: user 10min 12s, sys: 16 ms, total: 10min 12s
Wall time: 10min 12s
4983420


In [24]:
class GreedyIntervalSolver():
    
def simpleGreedy(instance, book_score, library_score, update = True):
    remaining_libraries = set(instance.libraries)
    day = 0
    chosen_libraries = []
    it=0
    while len(remaining_libraries) > 0:
        if it%(len(instance.libraries)/1000) == 0:
            library_scores = [(library_score(instance, lib,day, book_score), lib) for lib in remaining_libraries]
        it+=1
        max_el = max(library_scores, key = lambda x:x[0])
        library_scores.remove(max_el)
        score, library = max_el
        remaining_libraries.remove(library)
        if day + library.signup_time>= instance.D:
            break
        day+=library.signup_time
        books_to_take = (instance.D - day)*library.books_per_day
        sorted_books = sorted(library.books, key=book_score, reverse=True)#list(library.books.__reversed__())
        for book in sorted_books[:books_to_take]:
            for lib_id in book.library_ids:
                if lib_id != library.id:
                    instance.libraries[lib_id].remove_book(book)
                    
        if update:
            for book in sorted_books[books_to_take:]:
                book.library_ids.remove(library.id)
                
        library.books = sorted_books[:books_to_take]
        chosen_libraries.append(library)
    return chosen_libraries
        

IndentationError: expected an indented block (<ipython-input-24-e76f3bfb9837>, line 3)

In [None]:
def library_score(instance, lib, day, book_score):
    delta_time = instance.D - lib.signup_time
    n_best_books = sorted(lib.books, key=book_score, reverse=True)[:delta_time*lib.books_per_day]
    sum_of_best_book_scores = sum(book.value for book in n_best_books)
    return sum_of_best_book_scores/lib.signup_time**(1+lib.signup_time/instance.D)

sum_score = 0
for file_path in file_paths[1:]:
    print(file_path)
    instance = process_file(file_path)
    %time solution = simpleGreedy(instance, book_score, library_score)
    check_solution(instance.D,solution)
    score = score_solution(solution)
    print(score)
    sum_score += score
print(sum_score)

b_read_on.txt
CPU times: user 86.5 ms, sys: 8 µs, total: 86.5 ms
Wall time: 86.5 ms
5822900
c_incunabula.txt
CPU times: user 16.7 s, sys: 0 ns, total: 16.7 s
Wall time: 16.7 s
5688567
d_tough_choices.txt


In [None]:
class ProblemSolver:
    def __init__(self, B, L, D, books, libraries):
        self.B, self.L, self.D = B,L,D
        self.books = books
        self.libraries = libraries
        
    def get_solution(self,selected_lib_ids=None):
        if not selected_lib_ids:
            selected_lib_ids = self.get_individual()
        local_book_values = copy.copy(self.book_values)
        day = 0
        selected_libraries = [copy.copy(self.libraries[i]) for i in selected_lib_ids]
        it=0;
        while it < len(selected_libraries):
            next_library = selected_libraries[it]
            day+=next_library.signup_time
            if day>=self.D:
                break
            next_library.book_ids = next_library.get_n_best_books((self.D-day)*next_library.books_per_day, local_book_values)
            for book_id in next_library.book_ids:
                local_book_values[book_id] = 0
            it+=1
        return selected_libraries[:it]
    
    def get_individual(self):
        pass

In [6]:
class HeurysticSolver(ProblemSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(delta_time*lib.signup_time, self.book_values)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores/lib.signup_time
    
    def get_individual(self):
        lib_ids = [i for i in range(len(self.libraries))]
        lib_ids.sort(key = self.library_score, reverse= True)
        day = 0
        for i in range(len(lib_ids)):
            day+=self.libraries[lib_ids[i]].signup_time
            if day >= self.D:
                return tuple(lib_ids[:i])
        return tuple(lib_ids)

In [7]:
class PowerSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(delta_time*lib.signup_time, self.book_values)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores/lib.signup_time**(1+lib.signup_time/self.D)

In [8]:
class SimpleScoreVarianceSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(delta_time*lib.signup_time, self.book_values)
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001,np.var(best_scores))
        return sum_of_best_book_scores/book_variance

In [9]:
class SquareScoreVarianceSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(delta_time*lib.signup_time, self.book_values)
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001,np.var(best_scores))
        return sum_of_best_book_scores**2/(lib.signup_time*lib.signup_time*math.sqrt(book_variance))

In [10]:
class BookNumbersSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(delta_time*lib.signup_time, self.book_values)
        best_scores = [self.book_values[book] for book in n_best_books]
        sum_of_best_book_scores = sum(best_scores)
        book_variance = max(0.001,np.var(best_scores))
        return sum_of_best_book_scores**2/(book_variance*len(n_best_books)*lib.signup_time)

In [11]:
class ScoreSquareSolver(HeurysticSolver):
    def library_score(self,lib_id):
        lib = self.libraries[lib_id]
        delta_time = self.D - lib.signup_time
        n_best_books = lib.get_n_best_books(delta_time*lib.signup_time, self.book_values)
        sum_of_best_book_scores = sum([self.book_values[book] for book in n_best_books])
        return sum_of_best_book_scores**2/lib.signup_time

In [5]:
class randomChangeSolver(ProblemSolver):
    def get_individual(self):
        solvers_cls = [HeurysticSolver, PowerSolver, SimpleScoreVarianceSolver, SquareScoreVarianceSolver, BookNumbersSolver, ScoreSquareSolver]
        solvers = [cl(B,L,D,book_values, libraries) for cl in solvers_cls]
        population = [solver.get_individual() for solver in solvers]
        for i n range(100):
            minLength = min(len(pop) for pop in population)
            step = rand.range(0,min(10,minLength))
            
        

SyntaxError: unexpected EOF while parsing (<ipython-input-5-60f0a754dac1>, line 1)

In [12]:
sum_score = 0
solvers_cls = [HeurysticSolver, PowerSolver, SimpleScoreVarianceSolver, SquareScoreVarianceSolver, BookNumbersSolver, ScoreSquareSolver]
for file_path in file_paths:
    (B,L,D),book_values, libraries = process_file(file_path)
    solvers = [cl(B,L,D,book_values, libraries) for cl in solvers_cls]
    solutions = [solver.get_solution() for solver in solvers]
    [check_solution(D, solution) for solution in solutions]
    scores = [score_solution(solution, book_values) for solution in solutions]
    print(scores)
    score = max(scores)
    print(score)
    sum_score+=score
print("SUM",sum_score)

[21, 21, 21, 21, 21, 21]
21
[5822900, 5822900, 4075800, 5822900, 5822900, 5822900]
5822900
[5645747, 5640815, 1167633, 5632921, 5307077, 5525478]
5645747
[4815395, 4815395, 4815395, 4815395, 4815395, 4815395]
4815395
[4613373, 4642132, 4056632, 4606497, 4514009, 2040620]
4642132
[5240161, 5238624, 1195114, 5211764, 4111186, 5083231]
5240161
SUM 26166356


In [13]:
sum_score = 0
for file_path in file_paths:
    (B,L,D),book_values, libraries = process_file(file_path)
    solver = HeurysticSolver(B,L,D,book_values, libraries)
    solution = solver.get_solution()
    check_solution(D, solution)
    power_solver = PowerSolver(B,L,D,book_values, libraries)
    power_solution = power_solver.get_solution()
    check_solution(D, power_solution)
    score = max(score_solution(solution, book_values), score_solution(power_solution, book_values))
    print(score)
    sum_score +=score
print(sum_score)

21
5822900
5645747
4815395
4642132
5240161
26166356


In [13]:
books = []
values = [i for i in range(10000)]
iis = [i for i in range(10000)]
for i in range(10000):
    books.append(Book(i))
    
%timeit sum([book.value for book in books])
%timeit sum([values[i] for  i in iis ])

595 µs ± 2.19 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
616 µs ± 1.89 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
