In [None]:
!pip install deap

Collecting deap
  Downloading deap-1.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading deap-1.4.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/135.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deap
Successfully installed deap-1.4.3


In [None]:
!pip install deap joblib



In [None]:
import random
import operator
import numpy as np
import pandas as pd
import pickle
import os
from sklearn.model_selection import train_test_split
from collections import defaultdict
from deap import base, creator, tools, gp
from deap import algorithms
from multiprocessing import Pool
import logging
import time

# Setup logging to ensure output in Colab
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()])

# Load MovieLens dataset
def load_data(movies_path='movies.csv', ratings_path='ratings.csv'):
    logging.info("Loading data...")
    try:
        movies = pd.read_csv(movies_path)
        ratings = pd.read_csv(ratings_path)
        print(f"Loaded {len(movies)} movies and {len(ratings)} ratings")
        return movies, ratings
    except FileNotFoundError as e:
        logging.error(f"File not found: {e}")
        raise
    except Exception as e:
        logging.error(f"Error loading data: {e}")
        raise

# Split data per user into train and test sets
def user_train_test_split(ratings, test_size=0.2, cache_file='/content/train_test_split.pkl'):
    if os.path.exists(cache_file):
        logging.info("Loading train/test split from cache...")
        with open(cache_file, 'rb') as f:
            train_df, test_df = pickle.load(f)
        print(f"Loaded cached train/test split: {len(train_df)} train, {len(test_df)} test")
        return train_df, test_df

    logging.info("Splitting data...")
    train_data = []
    test_data = []
    for user, group in ratings.groupby('userId'):
        if len(group) < 5:
            train_data.extend(group.to_dict('records'))
            continue
        train, test = train_test_split(group, test_size=test_size, random_state=42)
        train_data.extend(train.to_dict('records'))
        test_data.extend(test.to_dict('records'))

    train_df = pd.DataFrame(train_data)
    test_df = pd.DataFrame(test_data)
    print(f"Split data: {len(train_df)} train, {len(test_df)} test")

    try:
        with open(cache_file, 'wb') as f:
            pickle.dump((train_df, test_df), f)
        logging.info("Cached train/test split")
    except Exception as e:
        logging.warning(f"Failed to cache split: {e}")

    return train_df, test_df

# Build feature maps including genre information
def build_feature_maps(train_df, movies_df, cache_file='/content/feature_maps.pkl'):
    if os.path.exists(cache_file):
        logging.info("Loading feature maps from cache...")
        with open(cache_file, 'rb') as f:
            feature_maps = pickle.load(f)
        print(f"Loaded cached feature maps")
        return feature_maps

    logging.info("Building feature maps...")
    user_avg = train_df.groupby('userId')['rating'].mean().to_dict()
    item_avg = train_df.groupby('movieId')['rating'].mean().to_dict()
    item_count = train_df.groupby('movieId')['rating'].count().to_dict()

    movies_df['genres'] = movies_df['genres'].fillna('Unknown').str.split('|')
    item_genres = movies_df.set_index('movieId')['genres'].to_dict()

    ratings_with_genres = train_df.merge(movies_df[['movieId', 'genres']], on='movieId', how='left')
    ratings_with_genres['genres'] = ratings_with_genres['genres'].apply(lambda x: x if isinstance(x, list) else ['Unknown'])
    user_genre_prefs = ratings_with_genres.explode('genres').groupby(['userId', 'genres'])['rating'].mean().unstack(fill_value=0)

    user_item_genre_scores = {}
    for user in train_df['userId'].unique():
        user_item_genre_scores[user] = {}
        for item in item_avg.keys():
            genres = item_genres.get(item, ['Unknown'])
            score = sum(user_genre_prefs.get(genre, {}).get(user, 0) for genre in genres) / (len(genres) if genres else 1)
            user_item_genre_scores[user][item] = score

    candidate_items_per_user = {}
    all_items = set(item_avg.keys())
    for user in train_df['userId'].unique():
        seen = set(train_df[train_df['userId'] == user]['movieId'])
        candidate_items_per_user[user] = list(all_items - seen)

    feature_maps = (user_avg, item_avg, item_count, item_genres, user_item_genre_scores, candidate_items_per_user)
    print(f"Built feature maps: {len(user_avg)} users, {len(item_avg)} items")

    try:
        with open(cache_file, 'wb') as f:
            pickle.dump(feature_maps, f)
        logging.info("Cached feature maps")
    except Exception as e:
        logging.warning(f"Failed to cache feature maps: {e}")

    return feature_maps

# Recommend top-N items for a user using a GP individual (vectorized)
def recommend(individual, user, candidate_items, user_avg, item_avg, item_count, user_item_genre_scores, N=10):
    func = toolbox.compile(expr=individual)
    u_avg = np.array([user_avg.get(user, 3)] * len(candidate_items))
    i_avg = np.array([item_avg.get(item, 3) for item in candidate_items])
    i_count = np.array([item_count.get(item, 1) for item in candidate_items])
    g_score = np.array([user_item_genre_scores.get(user, {}).get(item, 0) for item in candidate_items])

    try:
        scores = func(u_avg, i_avg, i_count, g_score)
    except Exception as e:
        logging.warning(f"Vectorized scoring failed: {e}, falling back to loop")
        scores = np.array([func(u_avg[i], i_avg[i], i_count[i], g_score[i]) for i in range(len(candidate_items))])

    top_indices = np.argsort(scores)[::-1][:N]
    return [candidate_items[i] for i in top_indices]

# Fitness evaluation for a single individual, returning metrics
def evaluate_individual(individual, test_users_subset, train_df, test_df, user_avg, item_avg, item_count, user_item_genre_scores, candidate_items_per_user):
    precision_scores = []
    recall_scores = []
    f1_scores = []
    user_metrics = {}

    for user in test_users_subset:
        test_items = set(test_df[test_df['userId'] == user]['movieId'])
        candidate_items = candidate_items_per_user.get(user, [])
        if not candidate_items or not test_items:
            continue
        recs = recommend(individual, user, candidate_items, user_avg, item_avg, item_count, user_item_genre_scores, N=10)
        hits = len(set(recs) & test_items)
        precision = hits / 10
        recall = hits / len(test_items) if test_items else 0
        f1 = (2 * precision * recall) / (precision + recall + 1e-8)
        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)
        user_metrics[user] = {'precision': precision, 'recall': recall, 'f1': f1}

    avg_prec = np.mean(precision_scores) if precision_scores else 0
    avg_rec = np.mean(recall_scores) if recall_scores else 0
    avg_f1 = np.mean(f1_scores) if f1_scores else 0
    return avg_f1, user_metrics

# Wrapper for parallel evaluation
def evaluate(individual):
    return evaluate_individual(
        individual, test_users_subset, train_df, test_df,
        user_avg, item_avg, item_count, user_item_genre_scores, candidate_items_per_user
    )

# Setup GP
pset = gp.PrimitiveSet("MAIN", 4)
pset.renameArguments(ARG0='u_avg')
pset.renameArguments(ARG1='i_avg')
pset.renameArguments(ARG2='i_count')
pset.renameArguments(ARG3='g_score')

pset.addPrimitive(np.add, 2)
pset.addPrimitive(np.subtract, 2)
pset.addPrimitive(np.multiply, 2)
pset.addPrimitive(lambda x, y: np.divide(x, y + 1e-5), 2, name="safe_div")
pset.addPrimitive(np.tanh, 1)
pset.addPrimitive(np.abs, 1)

if not hasattr(creator, 'FitnessMax'):
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
if not hasattr(creator, 'Individual'):
    creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=3)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("compile", gp.compile, pset=pset)
toolbox.register("evaluate", evaluate)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=len, max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=len, max_value=17))

# Early stopping callback
print("Defining EarlyStopping class...")
class EarlyStopping:
    def __init__(self, patience):  # Fixed method name
        self.patience = patience
        self.best_fitness = -np.inf
        self.best_gen = 0
        self.gen = 0
        print(f"EarlyStopping initialized with patience={self.patience}")

    def __call__(self, population, toolbox, halloffame):  # Fixed method name
        self.gen += 1
        current_best = halloffame[0].fitness.values[0]
        if current_best > self.best_fitness:
            self.best_fitness = current_best
            self.best_gen = self.gen
        if self.gen - self.best_gen >= self.patience:
            logging.info(f"Early stopping at generation {self.gen}: no improvement for {self.patience} generations")
            return True
        return False

# Custom evolutionary algorithm with early stopping
def custom_eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=False):
    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])

    invalid_ind = [ind for ind in population if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit[0],
        ind.user_metrics = fit[1]

    if halloffame is not None:
        halloffame.update(population)

    early_stopping = EarlyStopping(patience=3)

    for gen in range(ngen + 1):
        offspring = toolbox.select(population, len(population))
        offspring = [toolbox.clone(ind) for ind in offspring]

        nevals = 0
        for i in range(1, len(offspring), 2):
            if random.random() < cxpb:
                offspring[i-1], offspring[i] = toolbox.mate(offspring[i-1], offspring[i])
                del offspring[i-1].fitness.values, offspring[i].fitness.values
                nevals += 2
        for i in range(len(offspring)):
            if random.random() < mutpb:
                offspring[i], = toolbox.mutate(offspring[i])
                del offspring[i].fitness.values
                nevals += 1

        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0],
            ind.user_metrics = fit[1]

        if halloffame is not None:
            halloffame.update(offspring)

        population[:] = offspring

        record = stats.compile(population) if stats else {}
        logbook.record(gen=gen, nevals=nevals, **record)
        if verbose:
            print(logbook.stream)

        if early_stopping(population, toolbox, halloffame):
            break

    return population, logbook

# Main execution
print("Starting execution...")
start_time = time.time()
logging.info("Loading data...")
movies_df, ratings_df = load_data()

logging.info("Splitting data...")
train_df, test_df = user_train_test_split(ratings_df)

logging.info("Building feature maps...")
user_avg, item_avg, item_count, item_genres, user_item_genre_scores, candidate_items_per_user = build_feature_maps(train_df, movies_df)

logging.info("Preparing users...")
train_users = train_df['userId'].unique()
test_users = test_df['userId'].unique()
random.seed(42)
test_users_subset = list(test_users)
logging.info(f"Using {len(test_users_subset)} test users for evaluation")
print(f"Using {len(test_users_subset)} test users for evaluation")
print(f"Test user IDs: {sorted(test_users_subset)}")

# Setup parallel evaluation
logging.info("Setting up parallel evaluation...")
pool = Pool()
toolbox.register("map", pool.map)

pop = toolbox.population(n=25)
hof = tools.HallOfFame(1)

stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("max", np.max)

logging.info("Starting GP evolution...")
print("Starting GP evolution...")
pop, log = custom_eaSimple(
    pop, toolbox, cxpb=0.5, mutpb=0.3, ngen=8,
    stats=stats, halloffame=hof, verbose=True
)

# Cleanup
pool.close()
pool.join()

# Output results
logging.info(f"Best GP Individual: {hof[0]}")
logging.info(f"Best F1 Score: {hof[0].fitness.values[0]:.4f}")
logging.info(f"Total runtime: {time.time() - start_time:.2f} seconds")
print(f"Best GP Individual: {hof[0]}")
print(f"Best F1 Score: {hof[0].fitness.values[0]:.4f}")
print(f"Total runtime: {time.time() - start_time:.2f} seconds")

Defining EarlyStopping class...
Starting execution...
Loaded 9742 movies and 100836 ratings
Loaded cached train/test split: 80419 train, 20417 test
Loaded cached feature maps
Using 610 test users for evaluation
Test user IDs: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32), np.int64(33), np.int64(34), np.int64(35), np.int64(36), np.int64(37), np.int64(38), np.int64(39), np.int64(40), np.int64(41), np.int64(42), np.int64(43), np.int64(44), np.int64(45), np.int64(46), np.int64(47), np.int64(48), np.int64(49), np.int64(50), np.int64(51), np.int64(52), np.int64(53), np.int64(54), np.int64(55), np.int64(56)

In [None]:
# Generate and display recommended movies for a fixed user
fixed_user_id = 1  # Change to 2 or another ID if desired
print(f"\nRecommended Movies for User {fixed_user_id}:")
if fixed_user_id not in test_users_subset:
    print(f"Error: User {fixed_user_id} is not in test_users_subset. Choose from: {sorted(test_users_subset)}")
else:
    candidate_items = candidate_items_per_user.get(fixed_user_id, [])
    if not candidate_items:
        print(f"User {fixed_user_id}: No candidate items available")
    else:
        recs = recommend(hof[0], fixed_user_id, candidate_items, user_avg, item_avg, item_count, user_item_genre_scores, N=10)
        for i, movie_id in enumerate(recs, 1):
            movie_info = movies_df[movies_df['movieId'] == movie_id]
            if not movie_info.empty:
                title = movie_info['title'].iloc[0]
                genres = movie_info['genres'].iloc[0]
                print(f"{i}. {title} [{genres}]")
            else:
                print(f"{i}. Movie ID {movie_id} [Unknown]")
print()

# Display evaluation metrics
print("\nEvaluation Metrics:")
avg_f1, user_metrics = evaluate(hof[0])  # Re-evaluate best individual
avg_precision = np.mean([m['precision'] for m in user_metrics.values()])
avg_recall = np.mean([m['recall'] for m in user_metrics.values()])
print(f"Average Precision (All Test Users): {avg_precision:.4f}")
print(f"Average Recall (All Test Users): {avg_recall:.4f}")
print(f"Average F1 Score (All Test Users): {avg_f1:.4f}")
print(f"Metrics for User {fixed_user_id}:")
if fixed_user_id in user_metrics:
    metrics = user_metrics[fixed_user_id]
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1']:.4f}")
else:
    print(f"No metrics available for User {fixed_user_id}. Ensure user is in test set.")


Recommended Movies for User 1:
1. Shawshank Redemption, The (1994) [Crime|Drama]
2. Pulp Fiction (1994) [Comedy|Crime|Drama|Thriller]
3. Star Wars: Episode IV - A New Hope (1977) [Action|Adventure|Sci-Fi]
4. Terminator 2: Judgment Day (1991) [Action|Sci-Fi]
5. Lord of the Rings: The Fellowship of the Ring, The (2001) [Adventure|Fantasy]
6. Apollo 13 (1995) [Adventure|Drama|IMAX]
7. Godfather, The (1972) [Crime|Drama]
8. Lord of the Rings: The Return of the King, The (2003) [Action|Adventure|Drama|Fantasy]
9. Lord of the Rings: The Two Towers, The (2002) [Adventure|Fantasy]
10. Twelve Monkeys (a.k.a. 12 Monkeys) (1995) [Mystery|Sci-Fi|Thriller]


Evaluation Metrics:
Average Precision (All Test Users): 0.1656
Average Recall (All Test Users): 0.0730
Average F1 Score (All Test Users): 0.0827
Metrics for User 1:
Precision: 0.2000
Recall: 0.0426
F1 Score: 0.0702
