In [60]:
# 조 구성 함수
from collections import defaultdict
import numpy as np

def create_groups(df):
    # 조를 저장할 딕셔너리 생성
    groups = defaultdict(list)

    # 조건에 따라 데이터프레임 정렬
    sorted_df = df
    #sorted_df = df.sort_values(by=['조장 희망 여부', '음주 호불호', '성별'], ascending=[False, True, True])

    # 조 구성
    group_number = 1
    for index, row in sorted_df.iterrows():
        added_to_group = False
        temp_group_number = 1

        while not added_to_group:
            # 현재 조에 후보자를 추가할 수 있는지 검사하고 조건을 만족하면 추가
            if check_conditions(groups[temp_group_number], row):
                groups[temp_group_number].append(row)
                added_to_group = True
            else:
                # 다음 조로 이동하거나, 모든 조를 검사한 경우 새로운 조를 생성
                temp_group_number += 1
                if temp_group_number > group_number:
                    group_number = temp_group_number

    return groups

In [11]:
# 음주 호불호 검사
def check_drinking_preference(current_group, candidate):
    for member in current_group:
        if member['음주 호불호'] == '호' and candidate['음주 호불호'] == '불호':
            return False
        if member['음주 호불호'] == '불호' and candidate['음주 호불호'] == '호':
            return False
    return True

# 나이 표준편차 검사
def check_age_std(current_group, candidate):
    ages = [member['나이'] for member in current_group]
    ages.append(candidate['나이'])
    if candidate['조원 나이대 희망'] == '비슷한 나이대 선호' and np.std(ages) > 1.5:
        return False
    return True

# 성비 검사
def check_gender_ratio(current_group, candidate):
    male_count = sum([1 for member in current_group if member['성별'] == '남'])
    female_count = sum([1 for member in current_group if member['성별'] == '여'])
    total_count = male_count + female_count

    if total_count >= 7:
        return False

    if candidate['성별'] == '남':
        male_count += 1
    else:
        female_count += 1

    if not (female_count <=4 and male_count <=4):
        return False

    return True

# 최소/최대 인원 검사
def check_min_max_members(current_group, candidate):
    if len(current_group) >= 7:
        return False
    return True

# 조장 여부 검사
def check_leader_preference(current_group, candidate):
    leaders = [member for member in current_group if member['조장 희망 여부'] == '희망']
    if (candidate['조장 희망 여부'] == '희망') and len(leaders) > 0: 
        return False
    return True

In [67]:
def check_conditions(current_group, candidate, drink = True, age = True, gender = True, num = True, leader = True):
    if drink == True:
        if not check_drinking_preference(current_group, candidate):
            return False
    if age == True:
        if not check_age_std(current_group, candidate):
            return False
    if gender == True:
        if not check_gender_ratio(current_group, candidate):
            return False
    if num == True:
        if not check_min_max_members(current_group, candidate):
            return False
    if leader == True:
        if not check_leader_preference(current_group, candidate):
            return False
    return True

In [61]:
# 결과 출력 함수
def print_result(groups):
    for group_number, members in groups.items():
        print(f"Group {group_number}:")
        for member in members:
            print(f"""이름: {member['이름']}, 나이: {member['나이']}, 음주 호불호: {member['음주 호불호']}, 나이대 선호: {member['조원 나이대 희망']}, 성별: {member['성별']}, 조장: {member['조장 희망 여부']}""")
        print()

# 결과를 DataFrame에 할당
def assign_groups_to_df(df, groups):
    df['Group'] = -1
    for group_number, members in groups.items():
        for member in members:
            idx = df[df['이름'] == member['이름']].index[0]
            df.at[idx, 'Group'] = group_number
    return df

In [96]:
import csv
import pandas as pd

# CSV 파일 읽기
def read_csv(file_name):
    df = pd.read_csv(file_name)
    return df

# 데이터 전처리
def preprocess_data(df):
    # 나이 str -> int 변환
    df['나이'] = df['나이'].astype(int)
    return df

file_name = 'data.csv'
df = read_csv(file_name)
df = preprocess_data(df)

import random


# 초기 인구 생성
def create_initial_population(df, population_size):
    population = []
    for _ in range(population_size):
        shuffled_df = df.sample(frac=1, random_state=random.randint(1, 1000)).reset_index(drop=True)
        groups = create_groups(shuffled_df)
        population.append(groups)
    return population

# 적합도 함수
def fitness_function(groups):
    total_score = 0
    weight_age_std = 1
    weight_gender_balance = 3
    weight_drinking = 5
    weight_leader = 1
    
    all_members = set()
    duplicated_members = set()

    for _, group in groups.items():
        group_size = len(group)
        
        # 나이 표준편차
        age_std = np.std([member['나이'] for member in group])

        # 성별 비율
        num_males = sum([member['성별'] == '남' for member in group])
        num_females = group_size - num_males
        gender_balance = abs(num_males - num_females)

        # 음주 호불호
        drinkers = sum([member['음주 호불호'] == '호' for member in group])
        non_drinkers = sum([member['음주 호불호'] == '불호' for member in group])
        drinking_issue = 0 if drinkers == 0 or non_drinkers == 0 else 1

        # 조장 희망자
        leaders = sum([member['조장 희망 여부'] in ['희망', '상관 없음'] for member in group])
        has_leader = 1 if leaders >= 1 else 0
        
        # 중복자
        for member in group:
            member_tuple = tuple(member.items())
            if member_tuple in all_members:
                duplicated_members.add(member_tuple)
            else:
                all_members.add(member_tuple)
        
        # 적합도 점수 계산
        total_score += (
            weight_age_std * (group_size - age_std)
            - weight_gender_balance * gender_balance
            - weight_drinking * drinking_issue
            + weight_leader * has_leader)
            
        # 중복된 인원에 대한 패널티 추가
        penalty = len(duplicated_members) * 100000
        total_score -= penalty


    return total_score


# 선택 함수
def selection_function(population):
    selected_population = []
    population_size = len(population)
    for _ in range(population_size):
        # 두 개의 해 선택
        candidate1, candidate2 = random.sample(population, 2)

        # 선택된 두 개의 해 중 더 나은 해 선택
        if fitness_function(candidate1) > fitness_function(candidate2):
            selected_population.append(candidate1)
        else:
            selected_population.append(candidate2)
    return selected_population


# 교차 함수
def crossover_function(selected_population):
    offspring_population = []
    for _ in range(len(selected_population) // 2):
        parent1 = random.choice(selected_population)
        parent2 = random.choice(selected_population)

        crossover_point = random.randint(1, len(parent1) - 1)
        offspring1 = {**dict(list(parent1.items())[:crossover_point]), **dict(list(parent2.items())[crossover_point:])}
        offspring2 = {**dict(list(parent2.items())[:crossover_point]), **dict(list(parent1.items())[crossover_point:])}

        offspring_population.extend([offspring1, offspring2])

    return offspring_population

# 변이 함수
def mutation_function(offspring_population, mutation_rate):
    mutated_population = []
    for offspring in offspring_population:
        for group_number, group in offspring.items():
            if len(group) < 2:
                continue

            if random.random() < mutation_rate:
                idx1, idx2 = random.sample(range(len(group)), 2)
                group[idx1], group[idx2] = group[idx2], group[idx1]
        mutated_population.append(offspring)
    return mutated_population

# 유전 알고리즘 함수
def genetic_algorithm(df, population_size = 50, generations=50, mutation_rate=0.2):
    population = create_initial_population(df, population_size)
    
    for gen in range(generations):
        selected_population = selection_function(population)
        offspring_population = crossover_function(selected_population)
        population = mutation_function(offspring_population, mutation_rate)
        
        # 최고의 적합도 점수 추적
        best_solution = max(population, key=fitness_function)
        best_fitness = fitness_function(best_solution)
        
        # 진척도 출력
        print(f"Generation {gen+1}/{generations}: Best fitness = {best_fitness}")
        
    return best_solution



# 유전 알고리즘 적용
solution = genetic_algorithm(df)
print_result(solution)


Generation 1/50: Best fitness = -199951.66413681261
Generation 2/50: Best fitness = -399955.9932930102
Generation 3/50: Best fitness = -199951.66413681261
Generation 4/50: Best fitness = -399963.0128365051
Generation 5/50: Best fitness = -399963.0128365051
Generation 6/50: Best fitness = -399963.0128365051
Generation 7/50: Best fitness = -99954.16413681263
Generation 8/50: Best fitness = -2199956.781048066
Generation 9/50: Best fitness = -599965.0128365051
Generation 10/50: Best fitness = -399963.0128365051
Generation 11/50: Best fitness = -399963.0128365051
Generation 12/50: Best fitness = -399963.0128365051
Generation 13/50: Best fitness = -399963.0128365051
Generation 14/50: Best fitness = -399963.0128365051
Generation 15/50: Best fitness = -399963.0128365051
Generation 16/50: Best fitness = -399963.0128365051
Generation 17/50: Best fitness = -399963.0128365051
Generation 18/50: Best fitness = -399963.0128365051
Generation 19/50: Best fitness = -399963.0128365051
Generation 20/50: B

In [97]:
check_solution(solution)

각 조별 인원 수:
조 1: 7명
조 2: 7명
조 3: 7명
조 4: 7명
조 5: 7명
조 6: 7명
조 7: 7명
조 8: 7명
조 9: 7명
조 10: 7명
조 11: 7명
조 12: 7명
조 13: 6명
조 14: 6명
조 15: 5명
총 인원수: 101
여러 조에 배치된 인원:
이하영: 2개 조
김수민: 2개 조
윤석준: 2개 조


True

In [27]:
def check_completed_group(current_group, drink = True, age = True, gender = True, leader = True):
    if drink == True:
        if not check_drinking_preference_current(current_group):
            return False
    if age == True:
        if not check_age_std_current(current_group):
            return False
    if gender == True:
        if not check_gender_ratio_current(current_group):
            return False
    if leader == True:
        if not check_leader_preference_current(current_group):
            return False
    return True

def check_leader_preference_current(current_group):
    leader_count = sum([1 for member in current_group if member['조장 희망 여부'] == '희망'])
    ok_count = sum([1 for member in current_group if member['조장 희망 여부'] == '상관 없음'])
    if leader_count + ok_count == 0: 
        return False
    return True

def check_gender_ratio_current(current_group):
    male_count = sum([1 for member in current_group if member['성별'] == '남'])
    female_count = sum([1 for member in current_group if member['성별'] == '여'])
    total_count = male_count + female_count

    if total_count >= 7:
        return False

    if not female_count <=4:
        return False
    
    if not male_count <=4:
        return False

    return True

def check_age_std_current(current_group):
    ages = [member['나이'] for member in current_group]
    wishes = [member['조원 나이대 희망'] for member in current_group]
    if ('비슷한 나이대 선호' in wishes) and np.std(ages) > 2:
        return False
    return True

def check_drinking_preference_current(current_group):
    
    # 그룹 내에서 음주 호불호가 충돌하는지 확인
    drinkers = [member for member in current_group if member['음주 호불호'] == '호']
    non_drinkers = [member for member in current_group if member['음주 호불호'] == '불호']
    if drinkers and non_drinkers:
        return False

    return True

In [40]:
def check_solution(solution):
    group_sizes = {}
    member_count = {}
    
    for group_number, group in solution.items():
        group_sizes[group_number] = len(group)
        
        for member in group:
            if member['이름'] in member_count:
                member_count[member['이름']] += 1
            else:
                member_count[member['이름']] = 1

    print("각 조별 인원 수:")
    total_size = 0
    for group_number, size in group_sizes.items():
        total_size += size
        print(f"조 {group_number}: {size}명")
        
    print(f"총 인원수: {total_size}")
    if total_size == 101:
        tot = True
    
    duplicated_members = {name: count for name, count in member_count.items() if count > 1}
    
    if duplicated_members:
        print("여러 조에 배치된 인원:")
        for name, count in duplicated_members.items():
            print(f"{name}: {count}개 조")
    else:
        print("여러 조에 배치된 인원이 없습니다.")
        dup = True
        
    if total_size == True and dup == True:
        return False
    
    else:
        return True