In [1]:
import random

# -------- Movie Dataset (with noisy/contradictory cases) --------
movies = [
    (9, 80, 60, 1),
    (8, 50, 20, 1),
    (3, 20, 10, 0),
    (5, 10, 5, 0),
    (7, 70, 40, 1),
    (4, 15, 5, 0),
    (6, 25, 10, 0),
    (8, 40, 55, 1),
    (2, 10, 5, 0),
    (9, 90, 70, 1),
    # ðŸ”¹ Added tricky cases:
    (9, 30, 10, 0), # Strong star but flop
    (5, 60, 50, 1), # Average star but hit
    (7, 20, 5, 0),  # Medium star but flop
    (9, 80, 60, 0), # Contradiction: same as first but flop
]
10

# -------- Helpers --------
def random_rule():
    return (
        random.randint(0, 10),  # star threshold
        random.randint(0, 100), # budget threshold
        random.randint(0, 100)  # marketing threshold
    )

def evaluate_rule(rule, dataset):
    t1, t2, t3 = rule
    correct = 0
    for star, budget, marketing, actual in dataset:
        pred = 1 if (star > t1 and budget > t2 and marketing > t3) else 0
        if pred == actual:
            correct += 1
    return correct / len(dataset)

def fitness(rule):
    return evaluate_rule(rule, movies)

# -------- GA Operators --------
def mutate(rule):
    t1, t2, t3 = rule
    which = random.choice([0, 1, 2])
    if which == 0:
        t1 = random.randint(0, 10)
    elif which == 1:
        t2 = random.randint(0, 100)
    else:
        t3 = random.randint(0, 100)
    return (t1, t2, t3)

def crossover(r1, r2):
    point = random.choice([0, 1, 2])
    a = list(r1)
    b = list(r2)
    a[point], b[point] = b[point], a[point]
    return tuple(a), tuple(b)

# -------- Gene Expression Algorithm --------
def GeneExpressionAlgorithm(pop_size=30, generations=40, Pc=0.7, Pm=0.2):
    population = [random_rule() for _ in range(pop_size)]
    for _ in range(generations):
        scored = [(rule, fitness(rule)) for rule in population]
        scored.sort(key=lambda x: x[1], reverse=True)

        new_pop = [scored[0][0]]  # elitism

        while len(new_pop) < pop_size:
            cand1 = max(random.sample(scored, 2), key=lambda x: x[1])[0]
            cand2 = max(random.sample(scored, 2), key=lambda x: x[1])[0]

            if random.random() < Pc:
                c1, c2 = crossover(cand1, cand2)
            else:
                c1, c2 = cand1, cand2

            if random.random() < Pm:
                c1 = mutate(c1)
            if random.random() < Pm:
                c2 = mutate(c2)

            new_pop.append(c1)
            if len(new_pop) < pop_size:
                new_pop.append(c2)

        population = new_pop[:pop_size]

    best_rule = max(population, key=fitness)
    best_acc = fitness(best_rule)
    return best_rule, best_acc

# -------- Run Example --------
if __name__ == "__main__":
    best_rule, best_acc = GeneExpressionAlgorithm()
    print("Best Rule (star, budget, marketing):", best_rule)
    print("Accuracy on dataset:", best_acc)


Best Rule (star, budget, marketing): (3, 10, 13)
Accuracy on dataset: 0.9285714285714286
