In [18]:
import pandas as pd
import numpy as np
import random
import json
from dataclasses import dataclass

random.seed(42)

# First Step : convert the data into the right datatype

In [3]:
def import_data(file:str): # json file 
    with open(file) as f:
        data = json.load(f)

        # convert to numpy array
        data["preferences"] = np.array(data["preferences"])
        data["authorship"] = np.array(data["authorship"])
        data["friendships"] = np.array(data["friendships"])

    return data


In [4]:
data = import_data("datasets/easy_dataset_1.json")
data

{'dataset_id': 'Easy Dataset 1',
 'num_papers': 5,
 'num_reviewers': 5,
 'reviewer_capacity': 3,
 'min_reviews_per_paper': 3,
 'max_reviews_per_paper': 5,
 'preferences': array([[3, 2, 1, 5, 3],
        [5, 2, 4, 2, 2],
        [4, 3, 1, 2, 4],
        [4, 2, 2, 1, 1],
        [4, 3, 4, 1, 5]]),
 'friendships': array([[0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]]),
 'authorship': array([[0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [1, 1, 0, 0, 1],
        [0, 0, 1, 1, 0]])}

In [47]:
def fitness(agent: np.array, context, nb_reviewers, nb_papers, author_weight=1, friendship_weight=0.5):
    """ 
    Returns a score corresponding to the satisfaction the agent obtains with respect to the constraints
    we want him to follow.
    """
    n = nb_reviewers
    m = nb_papers

    # Satisfaction matrix: the pleasure each reviewer takes by reviewing it's attributed papers
    satisfaction = agent * context["preferences"]

    # Add author constraint
    author_cons = np.ones((n,m)) - author_weight * context["authorship"]
    satisfaction = np.multiply(satisfaction, author_cons)

    # Add friendship constraint
    friendship = np.triu(context["friendships"])    # only need upper triangular part because matrix is symmetric
    agents1, agents2 = np.where(friendship == 1)

    # Iterate over every pair of friends
    for i1, i2 in zip(agents1, agents2):
        # And then over all the papers they can review
        for j in range(m):
            if satisfaction[i1, j] and satisfaction[i2, j]: # Case where 2 friends review the same paper
                satisfaction[i1, j] = satisfaction[i1, j]*friendship_weight
                satisfaction[i2, j] = satisfaction[i2, j]*friendship_weight
    return np.sum(satisfaction)


In [51]:
def genetic_algo(dataset, nb_agents, mutation_freq):
    """
    Genetic algorithm concerning the distribution of papers reviews.

    Parameters:

    nb_agents (int) : The number of agents who populate the algorithm for each generation
    mutation_freq (float in [0;1]) : Mutation frequence
    """

    assert(0 <= mutation_freq <= 1)

    # Get context via dataset reading
    context = import_data(dataset)
    n = context["num_reviewers"]
    m = context["num_papers"]

    # Randomly generates some agents (= n*m matrices)
    agents = [np.random.rand(n,m).round() for _ in range(nb_agents)]

    scores = [0]
    it = 0
    while(max(scores) < 50 and it < 10):

        # FITNESS COMPUTATION
        scores = [fitness(ag, context, n, m) for ag in agents]
        
        
        # AGENT SELECTION
        new_agents = []
        new_len = int(0.8 * nb_agents)
        
        # Roulette metaphora
        while len(new_agents) < new_len:
            selection_arrow = random.random()   # random value between 0 and 1
            sum_scores = sum(scores)
            roulette_score = 0

            # Fill the roulette little by little
            # If the arrow is in the area we just added, the agent is selected for the next generation
            for i in range(len(agents)):

                pi = scores[i]/sum_scores
                
                # Area is proportional to agent fitness score
                if roulette_score < selection_arrow < roulette_score + pi:
                    new_agents.append(agents.pop(i))
                    scores.pop(i)
                    break

        
        # CROSSOVERS
        while(len(new_agents) < nb_agents):
            k = len(new_agents)

            # Choose two different random agents
            (i, j) = (random.randint(0, k-1), random.randint(0,k-1))
            if i == j:
                continue
            
            # Take half of the two agents, splitted vertically
            split_index = m // 2
            left = new_agents[i][:, :split_index]
            right = new_agents[j][:, split_index:]

            # Glue them to build a new agent
            new_agents.append(np.hstack((left, right)))


        # MUTATIONS
        for agent in new_agents:
            if random.random() < mutation_freq:
                (i, j) = (random.randint(0, n), random.randint(0, m))
                agent[i ,j] = 1 - agent[i, j]


        # Reset
        agents = new_agents
        new_agents = []
        it += 1

    scores = [fitness(ag, context, n, m) for ag in agents]
    return(agents[np.argmax(scores)], np.max(scores))
    

print(genetic_algo("datasets/easy_dataset_1.json", 10, 0))

FITNESS CALC :  25.35
AGENT SELECTION
CROSSOVERS
MUTATIONS
1
FITNESS CALC :  26.15
AGENT SELECTION
CROSSOVERS
MUTATIONS
2
FITNESS CALC :  25.75
AGENT SELECTION
CROSSOVERS
MUTATIONS
3
FITNESS CALC :  25.55
AGENT SELECTION
CROSSOVERS
MUTATIONS
4
FITNESS CALC :  26.2
AGENT SELECTION
CROSSOVERS
MUTATIONS
5
FITNESS CALC :  25.3
AGENT SELECTION
CROSSOVERS
MUTATIONS
6
FITNESS CALC :  26.45
AGENT SELECTION
CROSSOVERS
MUTATIONS
7
FITNESS CALC :  26.3
AGENT SELECTION
CROSSOVERS
MUTATIONS
8
FITNESS CALC :  29.4
AGENT SELECTION
CROSSOVERS
MUTATIONS
9
FITNESS CALC :  27.85
AGENT SELECTION
CROSSOVERS
MUTATIONS
10
(array([[1., 1., 0., 1., 0.],
       [1., 1., 0., 0., 1.],
       [1., 0., 0., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 0., 1.]]), 34.5)


# now lets define a fitness function

In [None]:
@dataclass
class weights:
    preference: float
    authorship: float
    friendships: float

params = weights(10, 1, 1) # weitght for authorship penalty should be higher than the one for friendships


def fitness (data:dict, solution:np.array, params:weights):
    assert solution.shape == data["preferences"].shape, "Solution and preferences must have the same shape"
    assert len(solution.shape) == 2, "Solution and preferences must be 2D arrays"

    # first we compute how much the preference of each person is satisfied
    preference = np.sum(data["preferences"] * solution)
    preference = preference / np.sum(data["preferences"]) # so we get a value between 0 and 1

    # now we have to add the different penalty
    # first we compute the penalty for authorship

    authorship = np.sum(data["authorship"] * solution)
    authorship = authorship / np.sum(data["authorship"]) # so we get a value between 0 and 1

    # now we compute the penalty for friendships (I think it works )
    friendships = np.sum(data["friendships"] * solution * authorship)
    friendships = friendships / np.sum(data["friendships"]) # so we get a value between 0 and 1

    # Do we put the constraint in the fitness function or as a strcit contrainst in the genetic algorithm ? TODO think about it

    fit = params.preference * preference - (params.authorship * authorship + params.friendships * friendships)

    return fit

In [81]:
solution = np.random.randint(0, 2, data["preferences"].shape)
fitness(data, solution, params)


4.7142857142857135