In [15]:
import random
import numpy as np
import matplotlib.pyplot as plt
from deap import base, creator, tools, algorithms

# Game configuration
HISTORY_LENGTH = 2           # Number of past moves to consider
GENOME_LENGTH = 2 ** (2 * HISTORY_LENGTH)  # 16 for HISTORY_LENGTH=2
NUM_ROUNDS = 100            # Number of rounds per IPD game

# Genetic algorithm parameters
POPULATION_SIZE = 300       # Number of individuals in population
MUTATION_RATE = 0.2         # Probability of mutation
CROSSOVER_RATE = 0.8        # Probability of crossover
TOURNAMENT_SIZE = 4         # Size of tournament selection
ELITISM_SIZE = 5            # Number of best individuals to keep
MAX_GENERATIONS = 150       # Number of generations to run

# IPD payoff matrix: (C,C)=3,3; (D,D)=1,1; (C,D)=0,5; (D,C)=5,0
PAYOFF = {
    (0, 0): (3, 3),  # Both cooperate
    (0, 1): (0, 5),  # Agent cooperates, opponent defects
    (1, 0): (5, 0),  # Agent defects, opponent cooperates
    (1, 1): (1, 1)   # Both defect
}

# Opponent strategies
def always_cooperate(_, __):
    """Vždy spolupracovať / Always cooperate."""
    return 0

def always_defect(_, __):
    """Vždy zradiť / Always defect."""
    return 1

def tit_for_tat(_, opp_hist):
    """Kopírovať posledný tah protihráča, na začiatku spolupracovať / Copy opponent's last move, cooperate first."""
    return opp_hist[-1] if opp_hist else 0

def random_strategy(_, __):
    """Náhodne spolupracovať alebo zradiť / Randomly cooperate or defect."""
    return random.randint(0, 1)

OPPONENTS = [
    ("AlwaysCooperate", always_cooperate),
    ("AlwaysDefect", always_defect),
    ("TitForTat", tit_for_tat),
    ("Random", random_strategy)
]

def history_to_index(history_self, history_opponent):
    """
    Premení posledné HISTORY_LENGTH tahy na binárny index.
    Converts the last HISTORY_LENGTH moves to a binary index.
    - Berie posledné dva tahy agenta a protihráča (ak nie sú, použije 0).
    - Tahy sa konvertujú na binárne číslo: [self_move1, opp_move1, self_move2, opp_move2].
    - Takes the last two moves of the agent and opponent (uses 0 if not available).
    - Converts moves to a binary number: [self_move1, opp_move1, self_move2, opp_move2].
    """
    index = 0
    for i in range(HISTORY_LENGTH):
        self_move = history_self[-(i+1)] if i < len(history_self) else 0
        opp_move = history_opponent[-(i+1)] if i < len(history_opponent) else 0
        index = (index << 1) | self_move
        index = (index << 1) | opp_move
    return index

def play_ipd(strategy_genome, opponent_strategy):
    """
    Simuluje hru väzňovho dilematu na NUM_ROUNDS kôl, vracia skóre agenta.
    Simulates a Prisoner's Dilemma game for NUM_ROUNDS rounds, returns agent's score.
    - Agentov tah je určený genomom na základe histórie (history_to_index).
    - Tah protihráča je určený funkciou opponent_strategy.
    - Skóre sa aktualizuje podľa matice PAYOFF.
    - Agent's move is determined by the genome based on history (history_to_index).
    - Opponent's move is determined by opponent_strategy.
    - Score is updated based on the PAYOFF matrix.
    """
    history_self = []
    history_opponent = []
    score = 0
    for _ in range(NUM_ROUNDS):
        state_index = history_to_index(history_self, history_opponent)
        move_self = strategy_genome[state_index]
        move_opponent = opponent_strategy(history_self, history_opponent)
        score += PAYOFF[(move_self, move_opponent)][0]
        history_self.append(move_self)
        history_opponent.append(move_opponent)
    return score

def evaluate_strategy(individual):
    """Vyhodnotí genom proti všetkým protihráčom / Evaluate genome against all opponents."""
    total_score = 0
    for _, opponent_strategy in OPPONENTS:
        total_score += play_ipd(individual, opponent_strategy)
    return (total_score,)

# Custom crossover: Uniform crossover
def uniform_crossover(ind1, ind2):
    """Vykoná uniformný kríženie medzi dvoma jedincami / Perform uniform crossover between two individuals."""
    for i in range(len(ind1)):
        if random.random() < 0.5:
            ind1[i], ind2[i] = ind2[i], ind1[i]
    return ind1, ind2

# Initialize DEAP
if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("gene_maker", random.randint, 0, 1)
toolbox.register("individual_creator", tools.initRepeat, creator.Individual, 
                 toolbox.gene_maker, n=GENOME_LENGTH)
toolbox.register("population", tools.initRepeat, list, toolbox.individual_creator)
toolbox.register("evaluate", evaluate_strategy)
toolbox.register("mate", uniform_crossover)
toolbox.register("mutate", tools.mutFlipBit, indpb=MUTATION_RATE)
toolbox.register("select", tools.selTournament, tournsize=TOURNAMENT_SIZE)

def main():
    """Spustí genetický algoritmus a analyzuje výsledky / Run the genetic algorithm and analyze results."""
    # Create initial population
    population = toolbox.population(n=POPULATION_SIZE)
    
    # Initialize statistics and history
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("max", np.max)
    stats.register("min", np.min)
    
    hof = tools.HallOfFame(ELITISM_SIZE)
    fitness_history = []
    gene_freq_history = [[] for _ in range(GENOME_LENGTH)]
    
    # Evaluate initial population
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit
    
    # Main GA loop
    for gen in range(MAX_GENERATIONS):
        # Select elites
        elites = tools.selBest(population, ELITISM_SIZE)
        
        # Create offspring
        offspring = toolbox.select(population, len(population) - ELITISM_SIZE)
        offspring = list(map(toolbox.clone, offspring))
        
        # Apply crossover
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < CROSSOVER_RATE:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        
        # Apply mutation
        for mutant in offspring:
            if random.random() < MUTATION_RATE:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        
        # Evaluate invalid individuals
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        
        # Update population
        population = elites + offspring[:POPULATION_SIZE - ELITISM_SIZE]
        
        # Update hall of fame
        hof.update(population)
        
        # Record statistics
        record = stats.compile(population)
        fitness_history.append(record["max"])
        
        # Track gene frequencies
        for i in range(GENOME_LENGTH):
            freq = sum(ind[i] for ind in population) / POPULATION_SIZE
            gene_freq_history[i].append(freq)
        
        print(f"Gen {gen}: Max={record['max']:.0f}, Avg={record['avg']:.2f}, Min={record['min']:.0f}")
    
    # Get best individual
    best_individual = hof.items[0]
    
    # Print results
    print("\nBest strategy genome:", best_individual)
    print(f"Fitness: {best_individual.fitness.values[0]:.0f}")
    
    # Analyze performance against each opponent
    print("\nPerformance against opponents:")
    for name, opponent in OPPONENTS:
        score = play_ipd(best_individual, opponent)
        print(f"Against {name}: {score} points")
    
    # Define zrada function
    def zrada(moje_historie, protihracova_historie):
        """
        Vráti nasledujúci tah (0=spolupracovať, 1=zradiť) na základe vyevolvovaného genomu.
        Returns the next move (0=cooperate, 1=defect) based on the evolved genome.
        - Používa history_to_index na získanie indexu stavu z histórie tahov.
        - Vráti hodnotu z best_individual na tomto indexe.
        - Uses history_to_index to get the state index from move histories.
        - Returns the value from best_individual at that index.
        """
        state_index = history_to_index(moje_historie, protihracova_historie)
        return best_individual[state_index]
    
    # Demonstrate strategy against Random
    print("\nExample match against Random (0=Cooperate, 1=Defect):")
    my_hist, opp_hist = [], []
    for round_num in range(10):
        my_move = zrada(my_hist, opp_hist)
        opp_move = random_strategy(my_hist, opp_hist)
        my_hist.append(my_move)
        opp_hist.append(opp_move)
        print(f"Round {round_num+1}: Me={my_move}, Opponent={opp_move}")
    
    return best_individual, zrada

if __name__ == "__main__":
    best_individual, zrada_strategy = main()

Gen 0: Max=1412, Avg=1158.22, Min=720
Gen 1: Max=1432, Avg=1261.43, Min=747
Gen 2: Max=1458, Avg=1328.13, Min=839
Gen 3: Max=1458, Avg=1358.32, Min=735
Gen 4: Max=1458, Avg=1352.02, Min=765
Gen 5: Max=1458, Avg=1361.25, Min=762
Gen 6: Max=1458, Avg=1361.05, Min=738
Gen 7: Max=1458, Avg=1374.12, Min=729
Gen 8: Max=1458, Avg=1360.70, Min=732
Gen 9: Max=1458, Avg=1347.74, Min=759
Gen 10: Max=1476, Avg=1367.33, Min=765
Gen 11: Max=1476, Avg=1366.76, Min=753
Gen 12: Max=1476, Avg=1369.53, Min=784
Gen 13: Max=1476, Avg=1376.16, Min=859
Gen 14: Max=1476, Avg=1359.71, Min=741
Gen 15: Max=1476, Avg=1366.55, Min=810
Gen 16: Max=1476, Avg=1368.66, Min=741
Gen 17: Max=1476, Avg=1368.10, Min=860
Gen 18: Max=1476, Avg=1376.00, Min=747
Gen 19: Max=1476, Avg=1369.44, Min=821
Gen 20: Max=1476, Avg=1368.78, Min=735
Gen 21: Max=1476, Avg=1363.35, Min=762
Gen 22: Max=1476, Avg=1369.95, Min=762
Gen 23: Max=1476, Avg=1381.02, Min=768
Gen 24: Max=1476, Avg=1379.64, Min=720
Gen 25: Max=1476, Avg=1368.43, Min=