In [42]:
import numpy as np
import pandas as pd
import random as rn

In [43]:
# распределение студентов по проектам можно представить например 
# как pandas DataFrame, где номер проекта соответствует списку распределённый на него студентов
students = pd.DataFrame([0, 1, 2, 3])
projects = pd.DataFrame([0, 1])
distribution = pd.DataFrame([(0, [0, 1]), (1, [2, 3])], columns=['project', 'students'])
distribution

Unnamed: 0,project,students
0,0,"[0, 1]"
1,1,"[2, 3]"


In [44]:
def df_random(shape, a, b):
    result = pd.DataFrame()
    for i in range(shape[0]):
        part_of_result = pd.DataFrame([rn.randint(a, b) for i in range(shape[1])]).T
        result = pd.concat([result, part_of_result], ignore_index=True)
    return result

In [45]:
# количество хард скилов (их можно указать любое натуральное число)
N_hard_skills = 200
N_students = 100

students_hard_skills = df_random((N_students, N_hard_skills), 0, 1)

# DataFrame содержащий набор векторов характеризующий навыки всех студентов
students_hard_skills.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0,1,0,1,1,1,1,1,0,1,...,0,1,0,1,0,1,0,0,1,1


In [46]:
N_projects = 40
projects_hard_skills = df_random((N_projects, N_hard_skills), 0, 1)

# DataFrame содержащий набор векторов характеризующий требования всех проектов к студентам
projects_hard_skills.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0,0,1,1,1,1,1,1,0,0,...,1,1,0,1,1,1,1,1,1,0


In [47]:
N_managers = 20
N_manager_hard_skills = 30
managers_require_hard_skills = df_random((N_projects, N_manager_hard_skills), 0, 1)
df_of_zeroes = df_random((N_projects, N_hard_skills - N_manager_hard_skills), 0, 0)
managers_require_hard_skills = pd.concat([managers_require_hard_skills, df_of_zeroes], ignore_index=True, axis=1)

# DataFrame содержащий набор векторов характеризующий требования всех руководителей к студентам
managers_require_hard_skills.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,1,1,1,1,1,1,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [48]:
# словарь соответстия проекта руководителю
project_to_manager = {}
for i in range(N_projects):
    project_to_manager[i] = i%N_managers
    
project_to_manager

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 11: 11,
 12: 12,
 13: 13,
 14: 14,
 15: 15,
 16: 16,
 17: 17,
 18: 18,
 19: 19,
 20: 0,
 21: 1,
 22: 2,
 23: 3,
 24: 4,
 25: 5,
 26: 6,
 27: 7,
 28: 8,
 29: 9,
 30: 10,
 31: 11,
 32: 12,
 33: 13,
 34: 14,
 35: 15,
 36: 16,
 37: 17,
 38: 18,
 39: 19}

In [49]:
# не использую в итоге
# функция находит необходимые навыки путём взятия поэлементного логического или
def get_require_hard_skills(projects_hard_skills, managers_require_hard_skills, project_to_manager):
    all_require_hard_skills = pd.DataFrame()
    for i in range(N_projects):
        # необходимые навыки для одного проекта
        require_hard_skills = pd.DataFrame(np.logical_or(projects_hard_skills.iloc[i], managers_require_hard_skills.iloc[project_to_manager[i]]), dtype=int).T
        # добавление в необходимые навыки необходимых навыков для одного проекта
        all_require_hard_skills = pd.concat([all_require_hard_skills, require_hard_skills], ignore_index=True)
        
    return all_require_hard_skills
        
require_hard_skills = get_require_hard_skills(projects_hard_skills, managers_require_hard_skills, project_to_manager)
# DataFrame с векторами характеризующими итоговые требования проекта к студентам 
require_hard_skills.head(1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,1,1,1,1,1,1,1,1,0,1,...,1,1,0,1,1,1,1,1,1,0


In [50]:
# функция которая расчитывает соответствие навыков студентов к требуемым навыкам
# Если студент обладает навыком    (1) и он требуется для проекта    (1), то ставим  1
# Если студент не обладает навыком (0) и он требуется для проекта    (1), то ставим -1
# Если студент не обладает навыком (0) и он не требуется для проекта (0), то ставим  0
# Если студент обладает навыком    (1) и он не требуется для проекта (0), то ставим  0
def match_f(require, present):
    if      require == 0 and present == 0:
        return 0
    elif require == 1 and present == 1:
        return 1
    elif require == 1 and present == 0:
        return -1
    else:
        return 0

# не использую
# функция возвращает соответсиве студентов проектам поэлементно применяя функцию match_f
def get_match_students_to_projects(require_hard_skills, students_hard_skills):
    match_students_to_projects = np.zeros(shape=(len(students_hard_skills), len(require_hard_skills), N_hard_skills), dtype=int)
    for i in range(N_students):
        match_student_to_projects = np.zeros(shape=(len(require_hard_skills), N_hard_skills), dtype=int)
        for j in range(N_projects):
            match_student_to_project = [match_f(require_hard_skills[k][j], students_hard_skills[k][i]) for k in range(N_hard_skills)]
            match_student_to_projects[j] = np.array(match_student_to_project)
        match_students_to_projects[i] = np.array(match_student_to_projects)
        
    return match_students_to_projects
        
match_students_to_projects = get_match_students_to_projects(require_hard_skills, students_hard_skills)
match_students_to_projects

array([[[-1,  1, -1, ..., -1,  1,  0],
        [-1,  1,  0, ...,  0,  1,  0],
        [-1,  1,  0, ..., -1,  0,  1],
        ...,
        [-1,  1,  0, ...,  0,  0,  0],
        [ 0,  0, -1, ..., -1,  0,  1],
        [-1,  0, -1, ...,  0,  1,  1]],

       [[-1,  1,  1, ..., -1, -1,  0],
        [-1,  1,  0, ...,  0, -1,  0],
        [-1,  1,  0, ..., -1,  0, -1],
        ...,
        [-1,  1,  0, ...,  0,  0,  0],
        [ 0,  0,  1, ..., -1,  0, -1],
        [-1,  0,  1, ...,  0, -1, -1]],

       [[-1,  1,  1, ..., -1,  1,  0],
        [-1,  1,  0, ...,  0,  1,  0],
        [-1,  1,  0, ..., -1,  0,  1],
        ...,
        [-1,  1,  0, ...,  0,  0,  0],
        [ 0,  0,  1, ..., -1,  0,  1],
        [-1,  0,  1, ...,  0,  1,  1]],

       ...,

       [[ 1, -1,  1, ..., -1, -1,  0],
        [ 1, -1,  0, ...,  0, -1,  0],
        [ 1, -1,  0, ..., -1,  0,  1],
        ...,
        [ 1, -1,  0, ...,  0,  0,  0],
        [ 0,  0,  1, ..., -1,  0,  1],
        [ 1,  0,  1, ...,  0, -1

In [51]:
match_students_to_projects[0].shape

(40, 200)

In [52]:
coefficients_of_significance_of_hard_skills = np.ones(N_hard_skills)
# вектор коэффициентов значимости хард скилов
# на данной стадии все они являются единицами
# иными словами мы считаем на данной стадии что все они вносят одинаковый вклад
coefficients_of_significance_of_hard_skills[0:5]

array([1., 1., 1., 1., 1.])

In [53]:
match_students_to_projects_scaled = match_students_to_projects*coefficients_of_significance_of_hard_skills
# скалированный вектор соответствия навыков студента №1 требуемым навыкам для проекта №1
match_students_to_projects_scaled[0][0][0:10]

array([-1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.])

In [54]:
# коэффициент значимости оценки проекта студентом
q_1 = 20
# коэффициент значимости соответстия навыков студента требуемым навыкам для проекта
q_2 = 80
# q_1 и q_2 были взяты по принцпу Парето
match_student_project_scaled = match_students_to_projects_scaled[0][0]
grade_of_optimality_of_a_project_for_a_student = q_1*0.5 + q_2*match_student_project_scaled.sum()/coefficients_of_significance_of_hard_skills.sum()
# оценка соответсия студента №1 для проекта №1 по шкале от -100 до 100
grade_of_optimality_of_a_project_for_a_student

7.2

In [55]:
# находит оценку как скалярное произведение вектора коэффициентов на вектор соответсвия плюс оценка студента
# взятые с соответствующими коэффициентами
def get_grade_of_optimality_of_a_project_for_a_student(coefficients_of_significance_of_each_grade,
                                                       student_grade_for_project,
                                                       coefficients_of_significance_of_hard_skills,
                                                       student_hard_skills,
                                                       project_hard_skills,
                                                       manager_require_hard_skills):
    require_hard_skills = np.logical_or(project_hard_skills, manager_require_hard_skills)

    match_student_project = [match_f(require_hard_skills[i], student_hard_skills[i]) for i in range(N_hard_skills)]
    match_student_project = np.array(match_student_project)
    
    match_student_project_scaled = match_student_project*coefficients_of_significance_of_hard_skills
    
    grade_of_optimality_of_a_project_for_a_student = coefficients_of_significance_of_each_grade[0]*student_grade_for_project +coefficients_of_significance_of_each_grade[1]*match_student_project_scaled.sum()/coefficients_of_significance_of_hard_skills.sum()
    return grade_of_optimality_of_a_project_for_a_student

get_grade_of_optimality_of_a_project_for_a_student([q_1, q_2],
                                                   0.5,
                                                   coefficients_of_significance_of_hard_skills,
                                                   students_hard_skills.iloc[0],
                                                   projects_hard_skills.iloc[0],
                                                   managers_require_hard_skills.iloc[0])

7.2

In [56]:
managers_require_hard_skills.shape

(40, 200)

In [57]:
project_to_manager[39]

19

In [58]:
grades_of_optimality_of_students_for_projects = np.zeros(shape=(N_students, N_projects))
for i in range(N_students):
    for j in range(N_projects):
        grades_of_optimality_of_students_for_projects[i][j] = get_grade_of_optimality_of_a_project_for_a_student([q_1, q_2],
                                                               0.5,
                                                               coefficients_of_significance_of_hard_skills,
                                                               students_hard_skills.iloc[i],
                                                               projects_hard_skills.iloc[j],
                                                               managers_require_hard_skills.iloc[project_to_manager[j]])

grades_of_optimality_of_students_for_projects.shape

(100, 40)

In [59]:
grades_of_optimality_of_students_for_projects.shape

(100, 40)

In [60]:
np.random.rand(N_students, N_projects)*2-1

array([[-0.2467514 , -0.45706433, -0.26530347, ...,  0.51639267,
        -0.71831414, -0.7264832 ],
       [ 0.58328902,  0.67762006, -0.95276898, ..., -0.22728034,
        -0.02269039,  0.84966524],
       [ 0.95868649, -0.26895802, -0.89282209, ...,  0.90629297,
         0.11451597,  0.64978672],
       ...,
       [-0.8211425 , -0.83206291, -0.44403751, ...,  0.80395433,
        -0.64599084,  0.38900999],
       [ 0.55524513,  0.77648049, -0.38329348, ..., -0.20282417,
        -0.66766826,  0.10508735],
       [-0.10079565,  0.1507934 , -0.4424831 , ...,  0.42733178,
         0.40824096,  0.92510475]])

In [61]:
# аналогично get_grade_of_optimality_of_a_project_for_a_student,
# но возвращается уже матрица содержащая оценки для всех студентов и проектов
def get_grades_of_optimality_of_a_projects_for_a_students(coefficients_of_significance_of_each_grade,
                                                          student_grades_for_projects,
                                                          coefficients_of_significance_of_hard_skills,
                                                          students_hard_skills,
                                                          projects_hard_skills,
                                                          project_to_manager):
    # инициализируем матрицу, которую будем заполнять
    grades_of_optimality_of_students_for_projects = np.zeros(shape=(N_students, N_projects))
    for i in range(len(students_hard_skills)):
        for j in range(len(projects_hard_skills)):
            # используем функцию get_grade_of_optimality_of_a_project_for_a_student для заполнения матрицы
            grades_of_optimality_of_students_for_projects[i][j] = \
                get_grade_of_optimality_of_a_project_for_a_student(coefficients_of_significance_of_each_grade,
                                                   student_grades_for_projects[i][j],
                                                   coefficients_of_significance_of_hard_skills,
                                                   students_hard_skills.iloc[i],
                                                   projects_hard_skills.iloc[j],
                                                   managers_require_hard_skills.iloc[j])
            
    return grades_of_optimality_of_students_for_projects

student_grades_for_projects = np.random.rand(N_students, N_projects)*2-1

grades_of_optimality_of_students_for_projects = get_grades_of_optimality_of_a_projects_for_a_students([q_1, q_2],
                                                      student_grades_for_projects,
                                                      coefficients_of_significance_of_hard_skills,
                                                      students_hard_skills,
                                                      projects_hard_skills,
                                                      project_to_manager)
grades_of_optimality_of_students_for_projects

array([[ 10.20368223,   0.14728243,   9.48615199, ..., -18.2853332 ,
         -7.65819171,   5.35351356],
       [  0.51155714,  16.15681112,  13.77696063, ...,   8.7718452 ,
         12.32391974,  11.65035448],
       [  8.63279119,   5.6498434 ,   6.53432534, ...,   3.72733675,
         22.19811626,  -6.91466832],
       ...,
       [-15.18495409,   5.92815299,  -1.09358829, ..., -19.95627254,
          2.94425821,  -3.39027778],
       [ 21.24411145,  19.16655622,  -2.44843737, ...,  -0.46172308,
         -1.38289554,   1.97439658],
       [ -2.27263555,  -9.80405047,  -1.43671139, ...,  -3.06277367,
         -1.13025617,  -7.30256914]])

In [62]:
grades_of_optimality_of_students_for_projects[0]

array([ 10.20368223,   0.14728243,   9.48615199,   1.02145159,
       -14.59065558,  -6.82804609, -13.00442521,   4.30628067,
       -16.69253066, -18.35794217, -14.28230738,   7.53458667,
       -21.03583691,   9.42710703,  22.88396404,  -8.93788338,
         3.99452782,  -6.62824689,  14.01829716, -11.04470642,
        -7.14349743,  -2.49611947, -13.10884536,  24.56427022,
        -4.28530466, -14.95352101,   0.84182974,  -3.23493438,
        12.14205171, -13.31844086,   5.57131836,  -4.30164409,
        -6.01201089,  -4.85498471,  -4.6285803 ,   5.39863067,
         2.12304539, -18.2853332 ,  -7.65819171,   5.35351356])

In [63]:
N_teams = N_students//3 + 1
teams = []
for i in range(0, N_students - 1, 3):
   teams.append([i, i + 1, i + 2]) 

teams.append([99])
teams

[[0, 1, 2],
 [3, 4, 5],
 [6, 7, 8],
 [9, 10, 11],
 [12, 13, 14],
 [15, 16, 17],
 [18, 19, 20],
 [21, 22, 23],
 [24, 25, 26],
 [27, 28, 29],
 [30, 31, 32],
 [33, 34, 35],
 [36, 37, 38],
 [39, 40, 41],
 [42, 43, 44],
 [45, 46, 47],
 [48, 49, 50],
 [51, 52, 53],
 [54, 55, 56],
 [57, 58, 59],
 [60, 61, 62],
 [63, 64, 65],
 [66, 67, 68],
 [69, 70, 71],
 [72, 73, 74],
 [75, 76, 77],
 [78, 79, 80],
 [81, 82, 83],
 [84, 85, 86],
 [87, 88, 89],
 [90, 91, 92],
 [93, 94, 95],
 [96, 97, 98],
 [99]]

In [64]:
# находит среднее арифметическое между соответствием студентов команды к проекту 
# и запполняет этими занчениями таблицу соответствия команд проектам
def get_grade_of_optimality_of_a_projects_for_a_teams(teams, grades_of_optimality_of_students_for_projects):
    shape_of_grades = grades_of_optimality_of_students_for_projects.shape
    grade_of_optimality_of_a_projects_for_a_teams = np.zeros(shape=(shape_of_grades[1], len(teams)))
    for project in range(shape_of_grades[1]):
        for team_index in range(len(teams)):
            sum_grade = 0.0
            # находим суммарное соответствие
            for student in teams[team_index]:
                sum_grade += grades_of_optimality_of_students_for_projects[student][project]
            
            # добавляем в таблицу среднее арифметическое соответствие каждого из членов команды
            grade_of_optimality_of_a_projects_for_a_teams[project][team_index] = sum_grade/len(teams[team_index])
        
    return grade_of_optimality_of_a_projects_for_a_teams

grade_of_optimality_of_a_projects_for_a_teams = get_grade_of_optimality_of_a_projects_for_a_teams(teams, grades_of_optimality_of_students_for_projects)
grade_of_optimality_of_a_projects_for_a_teams

array([[  6.44934352,   6.94796789,   6.16082323, ...,   2.370304  ,
          7.41314362,  -2.27263555],
       [  7.31797898,  -6.82927422,  -2.10894142, ...,  -5.03388732,
         11.66963121,  -9.80405047],
       [  9.93247932,   1.31686808, -13.31372951, ...,   6.96926597,
          1.0385646 ,  -1.43671139],
       ...,
       [ -1.92871709,  -5.25297331,  -6.41475352, ...,  -2.73652037,
        -10.70050657,  -3.06277367],
       [  8.95461476,  -3.09175431,   8.95443049, ...,  -6.45484468,
          3.24792373,  -1.13025617],
       [  3.36306658,  -0.60429398,  -2.53932806, ...,  -7.78783151,
          5.48363119,  -7.30256914]])

In [65]:
grade_of_optimality_of_a_projects_for_a_teams.shape

(40, 34)

In [66]:
!pip install scipy



In [109]:
#1
import numpy as np
from scipy.optimize import linear_sum_assignment

def optimize_team_project_distribution(grade_matrix):
    # Конвертируем матрицу оценок в матрицу стоимостей (для минимизации)
    cost_matrix = -np.array(grade_matrix)

    # Используем метод оптимизации для решения задачи о назначениях
    row_indices, col_indices = linear_sum_assignment(cost_matrix)
    # print(row_indices)

    # Создаем словарь с результатами распределения
    team_project_mapping = {team_id: project_id for project_id, team_id in zip(row_indices, col_indices)}

    return team_project_mapping

result_om = optimize_team_project_distribution(grade_of_optimality_of_a_projects_for_a_teams)
print(result_om)

{21: 0, 7: 1, 27: 2, 12: 4, 18: 5, 29: 6, 28: 7, 11: 8, 26: 9, 5: 10, 0: 11, 30: 12, 8: 13, 15: 14, 4: 15, 16: 16, 31: 17, 6: 18, 1: 19, 3: 20, 32: 21, 2: 22, 20: 23, 9: 24, 13: 25, 25: 29, 10: 30, 33: 32, 23: 33, 14: 34, 24: 35, 19: 36, 22: 37, 17: 39}


In [68]:
def get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, distribution_of_teams_across_projects):
    score = 0
    for i in range(len(distribution_of_teams_across_projects)):
        score += grade_of_optimality_of_a_projects_for_a_teams[distribution_of_teams_across_projects[i]][i]

    return score

get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, result_om)

468.1489035502311

In [69]:
sum_grade = 0
for i in range(len(result_om)):
    sum_grade += grade_of_optimality_of_a_projects_for_a_teams[i+5][i]
    
sum_grade

-13.332317374597686

In [70]:
#2
import random

def random_team_project_distribution(grade_matrix):
    num_teams = len(grade_matrix)
    num_projects = len(grade_matrix[0])

    # Создаем список идентификаторов команд и проектов
    team_ids = list(range(num_teams))
    project_ids = list(range(num_projects))

    # Случайно перемешиваем идентификаторы проектов
    random.shuffle(project_ids)

    # Создаем словарь с результатами распределения
    team_project_mapping = {team_id: project_ids[team_id] for team_id in team_ids}

    return team_project_mapping

result_random = random_team_project_distribution(grade_of_optimality_of_a_projects_for_a_teams.T)
print(result_random)


{0: 3, 1: 32, 2: 26, 3: 15, 4: 1, 5: 37, 6: 4, 7: 10, 8: 21, 9: 39, 10: 34, 11: 8, 12: 33, 13: 11, 14: 16, 15: 38, 16: 13, 17: 18, 18: 12, 19: 35, 20: 31, 21: 36, 22: 17, 23: 5, 24: 22, 25: 27, 26: 14, 27: 2, 28: 0, 29: 6, 30: 24, 31: 25, 32: 7, 33: 29}


In [71]:
get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, result_random)

69.53861458089689

In [130]:
#3
import random
import numpy as np

class GeneticAlgorithm:
    def __init__(self, grade_matrix, population_size=20, num_generations=2000, mutation_rate=0.01):
        self.grade_matrix = np.array(grade_matrix)
        self.population_size = population_size
        self.num_generations = num_generations
        self.mutation_rate = mutation_rate

    def initialize_population(self):
        num_teams, num_projects = self.grade_matrix.shape
        population = []

        for _ in range(self.population_size):
            chromosome = random.sample(range(num_projects), num_teams)
            population.append(chromosome)

        return population

    def evaluate_fitness(self, chromosome):
        fitness = 0

        for team_id, project_id in enumerate(chromosome):
            fitness += self.grade_matrix[team_id, project_id]

        return fitness

    def crossover(self, parent1, parent2):
        num_teams = len(parent1)
        crossover_point = random.randint(1, num_teams - 1)
        child1 = parent1[:crossover_point] + parent2[crossover_point:]
        child2 = parent2[:crossover_point] + parent1[crossover_point:]
        return child1, child2

    def mutate(self, chromosome):
        num_teams = len(chromosome)

        for i in range(num_teams):
            if random.random() < self.mutation_rate:
                chromosome[i] = random.randint(0, num_teams - 1)

        return chromosome

    def select_parents(self, population):
        parents = random.choices(population, k=2)
        return parents[0], parents[1]

    def evolve(self):
        population = self.initialize_population()

        for _ in range(self.num_generations):
            new_population = []

            for _ in range(self.population_size // 2):
                parent1, parent2 = self.select_parents(population)
                child1, child2 = self.crossover(parent1, parent2)
                child1 = self.mutate(child1)
                child2 = self.mutate(child2)
                new_population.append(child1)
                new_population.append(child2)

            population = new_population

        best_chromosome = max(population, key=self.evaluate_fitness)
        team_project_mapping = {team_id: project_id for team_id, project_id in enumerate(best_chromosome)}

        return team_project_mapping

genetic_algorithm = GeneticAlgorithm(grade_of_optimality_of_a_projects_for_a_teams.T)
result_ga = genetic_algorithm.evolve()

def GA(df):
    genetic_algorithm = GeneticAlgorithm(df)
    result_ga = genetic_algorithm.evolve()
    return result_ga

print(result_ga)

{0: 14, 1: 4, 2: 13, 3: 1, 4: 31, 5: 17, 6: 3, 7: 29, 8: 33, 9: 27, 10: 2, 11: 2, 12: 10, 13: 28, 14: 5, 15: 32, 16: 29, 17: 11, 18: 13, 19: 29, 20: 11, 21: 26, 22: 15, 23: 28, 24: 19, 25: 25, 26: 30, 27: 4, 28: 22, 29: 13, 30: 8, 31: 19, 32: 9, 33: 3}


In [114]:
get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, result_ga)

56.74300436710376

In [74]:
#4
from sklearn.cluster import KMeans
import numpy as np
kmeans = \
    KMeans(n_clusters=grade_of_optimality_of_a_projects_for_a_teams.T.shape[0]//2, random_state=0, n_init="auto")\
    .fit(grade_of_optimality_of_a_projects_for_a_teams.T)
result_kmeans = kmeans.predict(grade_of_optimality_of_a_projects_for_a_teams.T)

In [75]:
result_kmeans

array([ 0, 14, 11,  4,  8, 12, 14, 14,  8, 15,  3,  1, 13,  7,  1,  4,  4,
       14,  2,  3,  8,  1,  9, 10, 16,  6,  4,  2,  2, 14,  2,  4, 13,  5])

In [76]:
list_of_first_part_indexes = []
for i in np.unique(result_kmeans):
   list_of_first_part_indexes.append(np.where(result_kmeans == i)[0][0])
len(list_of_first_part_indexes)

17

In [77]:
grade_of_optimality_of_a_projects_for_a_teams.shape

(40, 34)

In [78]:
first_part = grade_of_optimality_of_a_projects_for_a_teams.T[list_of_first_part_indexes]
first_part.shape

(17, 40)

In [79]:
first_part_result_om = optimize_team_project_distribution(first_part.T)
first_part_result_om

[ 5  8  9 10 14 15 17 19 20 22 23 24 25 29 30 32 39]


{16: 5,
 1: 8,
 9: 9,
 12: 10,
 10: 14,
 8: 15,
 13: 17,
 14: 19,
 4: 20,
 11: 22,
 0: 23,
 15: 24,
 2: 25,
 6: 29,
 3: 30,
 5: 32,
 7: 39}

In [80]:
correct_first_part_result_om = {list_of_first_part_indexes[i]: first_part_result_om[i] for i in range(len(list_of_first_part_indexes))}
correct_first_part_result_om

{0: 23,
 11: 8,
 18: 25,
 10: 30,
 3: 20,
 33: 32,
 25: 29,
 13: 39,
 4: 15,
 22: 9,
 23: 14,
 2: 22,
 5: 10,
 12: 17,
 1: 19,
 9: 24,
 24: 5}

In [81]:
first_part_result_om

{16: 5,
 1: 8,
 9: 9,
 12: 10,
 10: 14,
 8: 15,
 13: 17,
 14: 19,
 4: 20,
 11: 22,
 0: 23,
 15: 24,
 2: 25,
 6: 29,
 3: 30,
 5: 32,
 7: 39}

In [82]:
first_part_score = 0.0
for key in first_part_result_om.keys():
    first_part_score += first_part[key][first_part_result_om[key]]

first_part_score

250.4311702865058

In [83]:
indexes_rows = set(range(len(grade_of_optimality_of_a_projects_for_a_teams.T)))
indexes_rows

{0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33}

In [84]:
second_part_indexes = indexes_rows.difference(set(list_of_first_part_indexes))
second_part_indexes

{6, 7, 8, 14, 15, 16, 17, 19, 20, 21, 26, 27, 28, 29, 30, 31, 32}

In [85]:
second_part = grade_of_optimality_of_a_projects_for_a_teams.T[list(second_part_indexes)]
second_part.shape

(17, 40)

In [86]:
second_part_result_om = optimize_team_project_distribution(second_part.T)
second_part_result_om

[ 0  1  2  7 12 13 14 16 17 22 23 24 32 34 35 36 39]


{10: 0,
 2: 1,
 12: 2,
 13: 7,
 1: 12,
 3: 13,
 5: 14,
 6: 16,
 16: 17,
 0: 22,
 9: 23,
 14: 24,
 15: 32,
 4: 34,
 11: 35,
 8: 36,
 7: 39}

In [87]:
correct_second_part_result_om = {list(second_part_indexes)[i]: second_part_result_om[i] for i in range(len(second_part_indexes))}
correct_second_part_result_om

{32: 22,
 6: 12,
 7: 1,
 8: 13,
 14: 34,
 15: 14,
 16: 16,
 17: 39,
 19: 36,
 20: 23,
 21: 0,
 26: 35,
 27: 2,
 28: 7,
 29: 24,
 30: 32,
 31: 17}

In [88]:
second_part_score = 0
for i in range(len(second_part_result_om)):
    second_part_score += second_part[i][second_part_result_om[i]]

second_part_score

239.5951569253049

In [89]:
first_part_score + second_part_score # win

490.02632721181067

In [90]:
merged_dict = {**correct_first_part_result_om, **correct_second_part_result_om}
get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, merged_dict)

490.0263272118108

In [132]:
def KMeans_distribution(df):
    kmeans = \
        KMeans(n_clusters=df.T.shape[0]//2, random_state=0, n_init="auto")\
        .fit(df.T)
    result_kmeans = kmeans.predict(df.T)
    
    list_of_first_part_indexes = []
    for i in np.unique(result_kmeans):
       list_of_first_part_indexes.append(np.where(result_kmeans == i)[0][0])

    first_part = df.T[list_of_first_part_indexes]
    first_part.shape
    
    first_part_result_om = optimize_team_project_distribution(first_part.T)
    first_part_result_om
    
    correct_first_part_result_om = {list_of_first_part_indexes[i]: first_part_result_om[i] for i in range(len(list_of_first_part_indexes))}
    correct_first_part_result_om
    
    indexes_rows = set(range(len(df.T)))
    
    second_part_indexes = indexes_rows.difference(set(list_of_first_part_indexes))
    
    second_part = df.T[list(second_part_indexes)]
    
    second_part_result_om = optimize_team_project_distribution(second_part.T)
    
    correct_second_part_result_om = {list(second_part_indexes)[i]: second_part_result_om[i] for i in range(len(second_part_indexes))}

    merged_dict = {**correct_first_part_result_om, **correct_second_part_result_om}

    return merged_dict

get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, KMeans_distribution(grade_of_optimality_of_a_projects_for_a_teams))

490.0263272118108

In [91]:
grade_of_optimality_of_a_teams_for_a_projects = grade_of_optimality_of_a_projects_for_a_teams.T

In [124]:
#5
import numpy as np
from scipy.optimize import linprog

def optimal_project_assignment(grade_of_optimality_of_a_projects_for_a_teams):
    # Преобразуем оценки в матрицу
    grades = np.array(grade_of_optimality_of_a_projects_for_a_teams)
    
    # Определяем количество команд и проектов
    num_teams = grades.shape[0]
    num_projects = grades.shape[1]
    
    # Формулируем задачу оптимизации
    # Матрица коэффициентов ограничений
    A_eq = np.zeros((num_teams + num_projects, num_teams * num_projects))
    
    # Вектор оценок
    c = -grades.flatten()  # Меняем знак, так как linprog решает задачу минимизации
    
    # Создаем словарь для сохранения результата
    assignment = {}
    
    if num_teams >= num_projects:
        # Если количество команд больше или равно числу проектов,
        # каждый проект будет назначен на свою команду
        for project_idx in range(num_projects):
            assignment[project_idx] = project_idx
    else:
        # Задаем ограничения равенства для каждого проекта
        for project_idx in range(num_projects):
            A_eq[num_teams + project_idx, project_idx::num_projects] = 1
        
        # Решаем задачу линейного программирования
        bounds = [(0, 1) for _ in range(num_teams * num_projects)]
        res = linprog(c, A_eq=A_eq, b_eq=np.ones(num_teams), bounds=bounds, method='highs')
        
        # Получаем результаты назначения команд на проекты
        x = np.round(res.x).astype(int)
        for team_idx, project_idx in enumerate(x):
            if project_idx == 1:
                assignment[team_idx] = np.argmax(grades[:, team_idx])
    
    return assignment

result_lp = optimal_project_assignment(grade_of_optimality_of_a_teams_for_a_projects.T)
print(result_lp)


{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33}


In [125]:
get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, result_lp)

11.384273408290856

In [94]:
#6
import numpy as np

def recommend_projects(grade_of_optimality_of_a_teams_for_a_projects):
    # Преобразуем оценки в матрицу
    grades = np.array(grade_of_optimality_of_a_teams_for_a_projects)
    
    # Применяем SVD для матрицы оценок
    U, S, Vt = np.linalg.svd(grades)
    
    # Задаем количество факторов (компонент), которые будут использованы для рекомендаций
    num_factors = min(grades.shape) - 1
    
    # Получаем матрицы U', S' и V'^T с использованием только первых num_factors факторов
    U_prime = U[:, :num_factors]
    S_prime = np.diag(S[:num_factors])
    Vt_prime = Vt[:num_factors, :]
    
    # Выполняем реконструкцию матрицы оценок
    grades_reconstructed = U_prime @ S_prime @ Vt_prime
    
    # Используем реконструированную матрицу оценок для рекомендации проектов для команд
    assignment = {}
    for team_idx in range(grades_reconstructed.shape[0]):
        assignment[team_idx] = np.argmax(grades_reconstructed[team_idx, :])
    
    return assignment

result_recommendation = recommend_projects(grade_of_optimality_of_a_teams_for_a_projects)
print(result_recommendation)


{0: 23, 1: 19, 2: 22, 3: 20, 4: 15, 5: 10, 6: 12, 7: 1, 8: 13, 9: 24, 10: 19, 11: 32, 12: 17, 13: 39, 14: 32, 15: 14, 16: 16, 17: 39, 18: 25, 19: 36, 20: 23, 21: 0, 22: 9, 23: 14, 24: 5, 25: 29, 26: 35, 27: 2, 28: 7, 29: 16, 30: 32, 31: 17, 32: 22, 33: 32}


In [95]:
get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, result_recommendation)

494.72537447117617

In [96]:
abba = grade_of_optimality_of_a_teams_for_a_projects[:,0:34]
abba.shape

(34, 34)

In [98]:
#7
def board_voting(grade_of_optimality_of_a_teams_for_a_projects):
    num_teams = len(grade_of_optimality_of_a_teams_for_a_projects)
    num_projects = len(grade_of_optimality_of_a_teams_for_a_projects[0])

    # Создаем список команд
    teams = list(range(num_teams))

    # Создаем список проектов
    projects = list(range(num_projects))

    # Создаем словарь для хранения результатов
    assignment = {}

    # Проходим по каждой команде
    for team in teams:
        # Получаем оценки для данной команды
        team_grades = grade_of_optimality_of_a_teams_for_a_projects[team]

        # Сортируем проекты по оценкам в убывающем порядке
        sorted_projects = sorted(projects, key=lambda project: team_grades[project], reverse=True)

        # Выбираем проект с наивысшим рангом, на котором команда может работать
        for project in sorted_projects:
            if project not in assignment.values():
                assignment[team] = project
                break

    return assignment

result_board = board_voting(grade_of_optimality_of_a_teams_for_a_projects)
print(result_board)


{0: 23, 1: 19, 2: 22, 3: 20, 4: 15, 5: 10, 6: 12, 7: 1, 8: 13, 9: 24, 10: 30, 11: 32, 12: 17, 13: 39, 14: 34, 15: 14, 16: 16, 17: 11, 18: 25, 19: 36, 20: 6, 21: 0, 22: 9, 23: 33, 24: 5, 25: 29, 26: 35, 27: 2, 28: 7, 29: 27, 30: 4, 31: 3, 32: 21, 33: 8}


In [99]:
get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, result_board)

450.90665574328494

In [122]:
def get_mean_score(iters, algo, is_T):
    score = 0.0
    for iter_i in range(iters):
        N_hard_skills = 200
        N_students = 100
        students_hard_skills = df_random((N_students, N_hard_skills), 0, 1)

        N_projects = 40
        projects_hard_skills = df_random((N_projects, N_hard_skills), 0, 1)

        N_managers = 20
        N_manager_hard_skills = 30
        managers_require_hard_skills = df_random((N_projects, N_manager_hard_skills), 0, 1)
        df_of_zeroes = df_random((N_projects, N_hard_skills - N_manager_hard_skills), 0, 0)
        managers_require_hard_skills = pd.concat([managers_require_hard_skills, df_of_zeroes], ignore_index=True, axis=1)

        # словарь соответстия проекта руководителю
        project_to_manager = {}
        for i in range(N_projects):
            project_to_manager[i] = i%N_managers

        student_grades_for_projects = np.random.rand(N_students, N_projects)*2-1

        grades_of_optimality_of_students_for_projects = get_grades_of_optimality_of_a_projects_for_a_students([q_1, q_2],
                                                          student_grades_for_projects,
                                                          coefficients_of_significance_of_hard_skills,
                                                          students_hard_skills,
                                                          projects_hard_skills,
                                                          project_to_manager)

        grade_of_optimality_of_a_projects_for_a_teams = get_grade_of_optimality_of_a_projects_for_a_teams(teams, grades_of_optimality_of_students_for_projects)

    
        if is_T:
            df = grade_of_optimality_of_a_projects_for_a_teams.T
        else:
            df = grade_of_optimality_of_a_projects_for_a_teams
        addition = get_score_of_distribution(grade_of_optimality_of_a_projects_for_a_teams, algo(df))
        score += addition
        print(f'\t{iter_i}: {addition}')
        
    return score/iters

In [134]:
iters = 10
mean_KMeans = get_mean_score(iters, KMeans_distribution, False)
print(f'4. KMeans and linear_sum_assignment mean score {mean_KMeans}')

mean_linear_sum_assignment = get_mean_score(iters, optimize_team_project_distribution, False)
print(f'1. linear_sum_assignment mean score {mean_linear_sum_assignment}')

mean_random = get_mean_score(iters, random_team_project_distribution, True)
print(f'2. random mean score {mean_random}')

mean_ga = get_mean_score(iters, GA, True)
print(f'3. genetic alghoritm mean score {mean_ga}')

# mean_optimal_project_assignment = get_mean_score(iters, optimal_project_assignment, False)
# print(f'5. optimal_project_assignment mean score {mean_optimal_project_assignment}')

mean_recommend_projects = get_mean_score(iters, recommend_projects, True)
print(f'6. recommend_projects mean score {mean_recommend_projects}')

mean_board_voting = get_mean_score(iters, board_voting, True)
print(f'7. board_voting mean score {mean_board_voting}')

	0: 480.907561558624
	1: 465.4001892758333
	2: 476.99786349086025
	3: 483.84861946755893
	4: 479.0603868271078
	5: 487.5721712378085
	6: 498.09585728815404
	7: 477.04498049795217
	8: 470.6866650703626
	9: 493.8535197074667
4. KMeans and linear_sum_assignment mean score 481.34678144217287
	0: 463.01738228421675
	1: 476.09456690367705
	2: 471.556688656117
	3: 451.8342539195792
	4: 443.5359082449329
	5: 466.8578428149006
	6: 468.4987820338923
	7: 455.7975538892451
	8: 449.8207634346706
	9: 469.31243076154016
1. linear_sum_assignment mean score 461.6326172942772
	0: 4.820446413017622
	1: 66.00832027700099
	2: 43.86069327118596
	3: 19.80583833112648
	4: 13.77675988854237
	5: -31.378805828442147
	6: 4.641217970285819
	7: -45.972744667952355
	8: -39.37375902131013
	9: 77.27983970893546
2. random mean score 11.346780634239002
	0: 59.498595972622056
	1: 54.37951018197522
	2: 52.963334874903694
	3: 14.601961292019135
	4: -2.156712444041231
	5: 35.202775186764406
	6: 48.96637211256017
	7: -36.973

In [135]:
iters = 100
mean_KMeans = get_mean_score(iters, KMeans_distribution, False)
print(f'4. KMeans and linear_sum_assignment mean score {mean_KMeans}')

mean_linear_sum_assignment = get_mean_score(iters, optimize_team_project_distribution, False)
print(f'1. linear_sum_assignment mean score {mean_linear_sum_assignment}')

mean_random = get_mean_score(iters, random_team_project_distribution, True)
print(f'2. random mean score {mean_random}')

mean_ga = get_mean_score(iters, GA, True)
print(f'3. genetic alghoritm mean score {mean_ga}')

# mean_optimal_project_assignment = get_mean_score(iters, optimal_project_assignment, False)
# print(f'5. optimal_project_assignment mean score {mean_optimal_project_assignment}')

mean_recommend_projects = get_mean_score(iters, recommend_projects, True)
print(f'6. recommend_projects mean score {mean_recommend_projects}')

mean_board_voting = get_mean_score(iters, board_voting, True)
print(f'7. board_voting mean score {mean_board_voting}')

	0: 479.0558265038967
	1: 484.3646955845183
	2: 473.59874005958073
	3: 444.33710748118665
	4: 492.04251358746296
	5: 482.0232940211956
	6: 472.1397294080578
	7: 453.78001694538284
	8: 490.6005844414481
	9: 473.41461538801417
	10: 438.1862696393926
	11: 483.6987215720322
	12: 482.42708983000574
	13: 495.07483682269606
	14: 453.60510074112574
	15: 482.42256932331185
	16: 459.77401377124943
	17: 472.82937840881306
	18: 478.8483061441046
	19: 498.43427135475383
	20: 460.1343131440007
	21: 503.1273860699249
	22: 485.18972321443
	23: 506.8702106066624
	24: 470.13729848000673
	25: 458.3425259863504
	26: 512.5644229454772
	27: 459.04607291930773
	28: 504.115050010853
	29: 471.0636026899082
	30: 491.3036259247441
	31: 466.5462715553602
	32: 481.58947839377726
	33: 504.22989860941857
	34: 465.9996901725161
	35: 481.0003274567098
	36: 493.29705189313006
	37: 464.091196599503
	38: 473.82182601503973
	39: 498.5671868506311
	40: 487.30738904231026
	41: 497.94526744878715
	42: 508.43784580309153
	43:

	44: 62.53722710522277
	45: 25.856033298080973
	46: 27.11908078196781
	47: 19.81375120300619
	48: 54.4087076593007
	49: 30.965269940479835
	50: 54.003626456134555
	51: 53.93785818700718
	52: -0.9442613524218342
	53: 68.87306824005931
	54: 39.37361772228451
	55: 61.06946141832619
	56: 66.14607453908071
	57: 14.66542087393686
	58: -18.01772370283851
	59: 32.010437843840336
	60: 33.27288817420334
	61: -28.575042846316464
	62: -3.4607350030020863
	63: 47.92996883882162
	64: 72.5956684433221
	65: 37.47514006500771
	66: 59.370751249591024
	67: 23.99196790934434
	68: -20.850053843720126
	69: 71.25039320980055
	70: 29.2944869150855
	71: 33.10186915864151
	72: 3.81602683495443
	73: 112.91116569953653
	74: 60.228040766678596
	75: 56.65647940869558
	76: 1.6043833621813324
	77: 91.43293302575249
	78: -11.241577359012073
	79: -3.3349004894701606
	80: 79.35548827369536
	81: -11.490480222618991
	82: 59.25123639238135
	83: 32.240931507105984
	84: 66.93434273218065
	85: 69.05203993512745
	86: 113.13548