From d567f45d3a447a5b866dfeb7198a9a5b256173c0 Mon Sep 17 00:00:00 2001 From: lsabor Date: Wed, 28 Aug 2024 18:34:07 -0700 Subject: [PATCH 1/4] improve print statments for migrator --- .../management/commands/migrate_old_db.py | 1 + migrator/services/migrate_comments.py | 20 +++++++- migrator/services/migrate_forecasts.py | 8 ++-- migrator/services/migrate_permissions.py | 3 +- migrator/services/migrate_projects.py | 11 +++-- migrator/services/migrate_questions.py | 47 +++++++++++++++---- migrator/services/migrate_scoring.py | 8 ++-- migrator/services/migrate_users.py | 46 +++++++++++++++--- migrator/services/migrate_votes.py | 42 ++++++++++++----- .../management/commands/build_forecasts.py | 20 ++++---- 10 files changed, 154 insertions(+), 52 deletions(-) diff --git a/migrator/management/commands/migrate_old_db.py b/migrator/management/commands/migrate_old_db.py index c974f541e0..78dd3fbec7 100644 --- a/migrator/management/commands/migrate_old_db.py +++ b/migrator/management/commands/migrate_old_db.py @@ -62,6 +62,7 @@ def handle(self, *args, site_ids=None, **options): migrate_users() print("Migrated users") migrate_fab_credits() + print("Migrated fab credits") migrate_questions(site_ids=site_ids) print("Migrated questions") migrate_projects(site_ids=site_ids) diff --git a/migrator/services/migrate_comments.py b/migrator/services/migrate_comments.py index c1d84b6664..21ed7d2b61 100644 --- a/migrator/services/migrate_comments.py +++ b/migrator/services/migrate_comments.py @@ -2,6 +2,7 @@ from migrator.utils import paginated_query from posts.models import Post from questions.models import Forecast +from django.utils import timezone def create_comment_vote(vote_obj): @@ -67,16 +68,31 @@ def migrate_comments(): comments = [] post_ids = Post.objects.values_list("id", flat=True) - for comment in paginated_query( - """ + start = timezone.now() + for i, comment in enumerate( + paginated_query( + """ SELECT c.* FROM metac_question_comment c where c.author_id is not null and c.id is not null order by c.id ;""" + ), + 1, ): + print( + f"\033[Kmigrating comments: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) if comment["question_id"] in post_ids: comments.append(create_comment(comment)) + print( + f"\033[Kmigrating comments: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + ) + print("bulk creating...", end="\r") Comment.objects.bulk_create(comments) + print("bulk creating... DONE") diff --git a/migrator/services/migrate_forecasts.py b/migrator/services/migrate_forecasts.py index eea3d8b0b8..a54b54e5e2 100644 --- a/migrator/services/migrate_forecasts.py +++ b/migrator/services/migrate_forecasts.py @@ -181,7 +181,7 @@ def migrate_metaculus_predictions(): for i, mp_data in enumerate(mp_histories, 1): print( f"Migrating continuous metaculus prediction {i}/{c} " - f"dur:{str(timezone.now() - start).split(".")[0]} " + f"dur:{str(timezone.now() - start).split('.')[0]} " f"remaining:{str((timezone.now() - start) / i * (c - i)).split(".")[0]}", end="\r", ) @@ -244,7 +244,7 @@ def migrate_metaculus_predictions(): AggregateForecast.objects.bulk_create(forecasts) print( f"\033[KMigrating continuous metaculus prediction {i}/{c} " - f"dur:{str(timezone.now() - start).split(".")[0]} " + f"dur:{str(timezone.now() - start).split('.')[0]} " ) # binary questions @@ -262,7 +262,7 @@ def migrate_metaculus_predictions(): for i, q_data in enumerate(question_histories, 1): print( f"Migrating binary metaculus prediction {i}/{c} " - f"dur:{str(timezone.now() - start).split(".")[0]} " + f"dur:{str(timezone.now() - start).split('.')[0]} " f"remaining:{str((timezone.now() - start) / i * (c - i)).split(".")[0]}", end="\r", ) @@ -300,5 +300,5 @@ def migrate_metaculus_predictions(): AggregateForecast.objects.bulk_create(forecasts) print( f"\033[KMigrating binary metaculus prediction {i}/{c} " - f"dur:{str(timezone.now() - start).split(".")[0]} " + f"dur:{str(timezone.now() - start).split('.')[0]} " ) diff --git a/migrator/services/migrate_permissions.py b/migrator/services/migrate_permissions.py index c85f1fa5e7..19983e1334 100644 --- a/migrator/services/migrate_permissions.py +++ b/migrator/services/migrate_permissions.py @@ -138,7 +138,8 @@ def migrate_common_permissions(site_ids: list): ): print( f"QuestionProjectPermission.permission affected " - f"project: {user_project_perm_obj['project_id']}" + f"project: {user_project_perm_obj['project_id']}", + end="\r", ) question_permission = convert_question_permissions(question_permission_code) diff --git a/migrator/services/migrate_projects.py b/migrator/services/migrate_projects.py index 140ef86baa..38cc4eeb80 100644 --- a/migrator/services/migrate_projects.py +++ b/migrator/services/migrate_projects.py @@ -177,9 +177,6 @@ def migrate_topics(question_ids: list[int], q_p_m2m_cls): ) ) - print("related_category_ids", related_category_ids) - print("related_tag_ids", related_tag_ids) - print("related_project_ids", related_project_ids) # # Aggregating all M2M tables which were related to the questions @@ -219,7 +216,13 @@ def migrate_topics(question_ids: list[int], q_p_m2m_cls): # Some topics contain inline question ids in topic.question_ids column m2m_queries.append([{"question_id": x} for x in topic_obj["question_ids"]]) - print("inline_question_ids", topic_obj["question_ids"]) + print( + f"related_categories:{related_category_ids}" + f"related_tags:{related_tag_ids} " + f"related_projects:{related_project_ids} " + f"inline_questions:{topic_obj["question_ids"]}", + end="\r", + ) m2m_objects = [] for m2m in itertools.chain(*m2m_queries): diff --git a/migrator/services/migrate_questions.py b/migrator/services/migrate_questions.py index 8604ee8ed8..a11709b3ec 100644 --- a/migrator/services/migrate_questions.py +++ b/migrator/services/migrate_questions.py @@ -73,7 +73,7 @@ def create_question(question: dict, **kwargs) -> Question: created_at=question["created_time"], edited_at=question["edited_time"], open_time=question["publish_time"], - cp_reveal_time=question["cp_reveal_time"], + cp_reveal_time=question["publish_time"], scheduled_close_time=( question["close_time"] if question["close_time"] @@ -162,8 +162,10 @@ def migrate_questions__simple(site_ids: list[int] = None): posts = [] site_ids = site_ids or [] - for old_question in paginated_query( - """ + start = timezone.now() + for i, old_question in enumerate( + paginated_query( + """ SELECT q.*, ARRAY_AGG(o.label ORDER BY o.id) AS option_labels FROM metac_question_question q JOIN (SELECT q.* @@ -179,14 +181,32 @@ def migrate_questions__simple(site_ids: list[int] = None): LEFT JOIN metac_question_option o ON q.id = o.question_id GROUP BY q.id;""", - [tuple(site_ids)], + [tuple(site_ids)], + ), + 1, ): + print( + f"\033[Kmigrating questions/posts: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) question = create_question(old_question) if question is not None: questions.append(question) posts.append(create_post(old_question, question_id=old_question["id"])) + print( + f"\033[Kmigrating questions/posts: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating...", + end="\r", + ) Question.objects.bulk_create(questions) Post.objects.bulk_create(posts) + print( + f"\033[Kmigrating questions/posts: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating... DONE", + ) # @@ -198,8 +218,10 @@ def migrate_questions__composite(site_ids: list[int] = None): old_groups = {} site_ids = site_ids or [] - for old_question in paginated_query( - """SELECT q.*, + start = timezone.now() + for i, old_question in enumerate( + paginated_query( + """SELECT q.*, qc.parent_id as condition_id, qc.unconditional_question_id as condition_child_id, qc.resolution as qc_resolution @@ -222,9 +244,16 @@ def migrate_questions__composite(site_ids: list[int] = None): metac_question_conditional qc ON qc.child_id = q.id -- Ensure parents go first ORDER BY group_id DESC;""", - [tuple(site_ids)], - itersize=10000, + [tuple(site_ids)], + itersize=10000, + ), + i, ): + print( + f"\033[Kprocessing questions/posts: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) group_id = old_question["group_id"] # If root @@ -589,6 +618,6 @@ def migrate_post_snapshots_forecasts(): bulk_update = [] if not (processed % 25_000): - print(f"Updated PostUserSnapshot.last_forecast_date: {processed}") + print(f"Updated PostUserSnapshot.last_forecast_date: {processed}", end="\r") PostUserSnapshot.objects.bulk_update(bulk_update, fields=["last_forecast_date"]) diff --git a/migrator/services/migrate_scoring.py b/migrator/services/migrate_scoring.py index 1a6dc39e8c..abef3d4a23 100644 --- a/migrator/services/migrate_scoring.py +++ b/migrator/services/migrate_scoring.py @@ -98,8 +98,8 @@ def score_questions(qty: int | None = None, start_id: int = 0): f = question.user_forecasts.count() print( f"\033[Kscoring question {i:>4}/{c} ID:{question.id:<4} forecasts:{f:<4} " - f"dur:{str(timezone.now() - start).split(".")[0]} " - f"est:{str((timezone.now() - start) / i * c).split(".")[0]} " + f"dur:{str(timezone.now() - start).split('.')[0]} " + f"remaining:{str((timezone.now() - start) / i * (c - i)).split(".")[0]} " f"scoring: {','.join(score_types)}...", end="\r", ) @@ -110,7 +110,7 @@ def score_questions(qty: int | None = None, start_id: int = 0): ) print( f"\033[Kscoring question {i:>4}/{c} ID:{question.id:<4} forecasts:{f:<4} " - f"dur:{str(timezone.now() - start).split(".")[0]} " - f"est:{str((timezone.now() - start) / i * (c - i)).split(".")[0]} " + f"dur:{str(timezone.now() - start).split('.')[0]} " + f"remaining:{str((timezone.now() - start) / i * (c - i)).split(".")[0]} " f"scoring: {','.join(score_types)}... DONE", ) diff --git a/migrator/services/migrate_users.py b/migrator/services/migrate_users.py index 449661b704..776e2c9a19 100644 --- a/migrator/services/migrate_users.py +++ b/migrator/services/migrate_users.py @@ -1,5 +1,6 @@ from social_django.models import Association, Code, Nonce, Partial, UserSocialAuth from rest_framework.authtoken.models import Token +from django.utils import timezone from migrator.utils import paginated_query from users.models import User @@ -50,22 +51,55 @@ def migrate_social_auth(): def migrate_users(): + start = timezone.now() users = [] - for user_obj in paginated_query( + for i, user_obj in enumerate(paginated_query( "SELECT u.*, p.bio_text, p.website " "FROM metac_account_user u " "LEFT JOIN metac_account_userprofile p ON p.user_id = u.id" - ): + ), 1): + print( + f"\033[Kmigrating users: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) users.append(create_user(user_obj)) + print( + f"\033[Kmigrated users: {i}. " + f"dur:{str(timezone.now() - start).split(".")[0]}. " + "bulk creating...", + end="\r", + ) User.objects.bulk_create(users, batch_size=10_000) + print( + f"\033[Kmigrated users: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating... DONE" + ) # Migrating existing bot/user tokens - tokens = [ - Token(**token_obj) - for token_obj in paginated_query("SELECT * FROM authtoken_token") - ] + start = timezone.now() + tokens = [] + for i, token_obj in enumerate(paginated_query("SELECT * FROM authtoken_token"), 1): + tokens.append(Token(**token_obj)) + print( + f"\033[Kmigrating tokens: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) + print( + f"\033[Kmigrating tokens: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating...", + end="\r", + ) Token.objects.bulk_create(tokens, batch_size=5_000) + print( + f"\033[Kmigrating tokens: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating... DONE" + ) # Social migrations migrate_social_auth() diff --git a/migrator/services/migrate_votes.py b/migrator/services/migrate_votes.py index 967737a75a..3a0513c0d9 100644 --- a/migrator/services/migrate_votes.py +++ b/migrator/services/migrate_votes.py @@ -1,5 +1,6 @@ from migrator.utils import paginated_query from posts.models import Vote, Post +from django.utils import timezone def create_vote(vote_obj, direction: int): @@ -12,24 +13,41 @@ def create_vote(vote_obj, direction: int): def migrate_votes(): post_ids = Post.objects.values_list("id", flat=True) + start = timezone.now() vote_instances = [] # Migrating Upvotes - vote_instances += [ - create_vote(obj, 1) - for obj in paginated_query( - "SELECT * FROM metac_question_question_votes_up", + for i, obj in enumerate( + paginated_query("SELECT * FROM metac_question_question_votes_up"), 1 + ): + print( + f"\033[Kmigrating upvotes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", ) - if obj["question_id"] in post_ids - ] + if obj["question_id"] in post_ids: + vote_instances.append(create_vote(obj, 1)) + print( + f"\033[Kmigrating upvotes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + ) # Migrating Downvotes - vote_instances += [ - create_vote(obj, -1) - for obj in paginated_query( - "SELECT * FROM metac_question_question_votes_down", + for i, obj in enumerate( + paginated_query("SELECT * FROM metac_question_question_votes_down"), 1 + ): + print( + f"\033[Kmigrating downvotes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", ) - if obj["question_id"] in post_ids - ] + if obj["question_id"] in post_ids: + vote_instances.append(create_vote(obj, -1)) + print( + f"\033[Kmigrating downvotes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + ) + print("bulk creating...", end="\r") Vote.objects.bulk_create(vote_instances, ignore_conflicts=True) + print("bulk creating... DONE") diff --git a/questions/management/commands/build_forecasts.py b/questions/management/commands/build_forecasts.py index d6f1cc95c3..fab012d2dd 100644 --- a/questions/management/commands/build_forecasts.py +++ b/questions/management/commands/build_forecasts.py @@ -14,11 +14,11 @@ class Command(BaseCommand): def handle(self, *args, **options): qs = Question.objects.all().order_by("id").prefetch_related("user_forecasts") - total = qs.count() - processed = 0 + c = qs.count() + i = 0 tm = time.time() - print(f"Building CP. Found {total} questions with forecasts to process.") + print(f"Building CP. Found {c} questions with forecasts to process.") for question in qs.iterator(chunk_size=100): @@ -29,15 +29,15 @@ def handle(self, *args, **options): "Failed to generate forecast for question %s", question.id ) - processed += 1 + i += 1 print( - f"Processed {int(processed / total * 100)}% ({processed}/{total}) " - f"Dur:{round(time.time() - tm)}s " - f"Est:{round((time.time() - tm) / processed * (total - processed))}s", + f"Processed {int(i / c * 100)}% ({i}/{c}) " + f"dur:{round(time.time() - tm)}s " + f"remaining:{round((time.time() - tm) / i * (c - i))}s", end="\r", ) print( - f"Processed {int(processed / total * 100)}% ({processed}/{total}) " - f"Dur:{round(time.time() - tm)}s " - f"Est:{round((time.time() - tm) / processed * (total - processed))}s" + f"Processed {int(i / c * 100)}% ({i}/{c}) " + f"dur:{round(time.time() - tm)}s " + f"remaining:{round((time.time() - tm) / i * (c - i))}s" ) From d1809dd8ff5ee0245a39e74c5df12dd532db2cac Mon Sep 17 00:00:00 2001 From: lsabor Date: Wed, 28 Aug 2024 20:20:46 -0700 Subject: [PATCH 2/4] more print statement tweaks for migrations --- migrator/services/migrate_leaderboards.py | 10 ++++++++-- migrator/services/post_migrate.py | 2 +- posts/jobs.py | 7 ++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/migrator/services/migrate_leaderboards.py b/migrator/services/migrate_leaderboards.py index 5914a22ea5..8b97aff864 100644 --- a/migrator/services/migrate_leaderboards.py +++ b/migrator/services/migrate_leaderboards.py @@ -52,6 +52,7 @@ def populate_global_leaderboards(): for i, leaderboard in enumerate(global_leaderboards, 1): print("populating:", i, "/", c, leaderboard.name, end="\r") update_project_leaderboard(main_site_project, leaderboard) + entries = LeaderboardEntry.objects.filter(leaderboard=leaderboard).count() print( "populating:", i, @@ -59,8 +60,9 @@ def populate_global_leaderboards(): c, leaderboard.name, "(created", - LeaderboardEntry.objects.filter(leaderboard=leaderboard).count(), + entries, "entries)", + end="\r" if entries == 0 else "\n", ) @@ -73,6 +75,9 @@ def populate_project_leaderboards(): print("populating:", i, "/", c, project.name, end="\r") for leaderboard in project.leaderboards.all(): update_project_leaderboard(project, leaderboard) + entries = ( + LeaderboardEntry.objects.filter(leaderboard__project=project).count(), + ) print( "populating:", i, @@ -80,6 +85,7 @@ def populate_project_leaderboards(): c, project.name, "(created", - LeaderboardEntry.objects.filter(leaderboard__project=project).count(), + entries, "entries)", + end="\r" if entries == 0 else "\n", ) diff --git a/migrator/services/post_migrate.py b/migrator/services/post_migrate.py index 7c43ee26e8..b480383eeb 100644 --- a/migrator/services/post_migrate.py +++ b/migrator/services/post_migrate.py @@ -29,6 +29,6 @@ def post_migrate_calculate_divergence(): logger.exception(f"Error running calculate_divergence for post {post.id}") if not idx % 250: - print(f"Processed {idx + 1}/{posts_total} posts") + print(f"Processed {idx + 1}/{posts_total} posts", end="\r") print("Finished calculate_divergence") diff --git a/posts/jobs.py b/posts/jobs.py index b00beaa11c..9e01bee0ef 100644 --- a/posts/jobs.py +++ b/posts/jobs.py @@ -59,7 +59,7 @@ def job_compute_movement(): posts = [] - for idx, post in enumerate(qs.iterator(100)): + for i, post in enumerate(qs.iterator(100), 1): try: post.movement = compute_movement(post) except: @@ -72,7 +72,8 @@ def job_compute_movement(): Post.objects.bulk_update(posts, fields=["movement"]) posts = [] - if not idx % 100: - logger.info(f"Processed {idx + 1}/{total}. ") + print(f"compute movement: {i}/{total}", end="\r") + print("bulk updating...", end="\r") Post.objects.bulk_update(posts, fields=["movement"]) + print("bulk updating... DONE") From c3c771b27331796ea4437f39cbd9eb38071210b1 Mon Sep 17 00:00:00 2001 From: lsabor Date: Wed, 28 Aug 2024 23:14:50 -0700 Subject: [PATCH 3/4] continued incremental changes --- .../management/commands/migrate_old_db.py | 67 ++++++++++++------- migrator/services/migrate_comments.py | 25 +++++-- migrator/services/migrate_leaderboards.py | 8 +-- migrator/services/migrate_permissions.py | 11 +++ migrator/services/migrate_projects.py | 2 +- migrator/services/migrate_questions.py | 33 ++++++++- migrator/services/migrate_scoring.py | 19 +++++- posts/views.py | 2 +- 8 files changed, 127 insertions(+), 40 deletions(-) diff --git a/migrator/management/commands/migrate_old_db.py b/migrator/management/commands/migrate_old_db.py index 78dd3fbec7..962410d5ec 100644 --- a/migrator/management/commands/migrate_old_db.py +++ b/migrator/management/commands/migrate_old_db.py @@ -1,6 +1,8 @@ from django.core.management import call_command from django.core.management.base import BaseCommand from django.db import connection +from django.utils import timezone +from datetime import datetime, timedelta from migrator.services.migrate_comments import migrate_comments, migrate_comment_votes from migrator.services.migrate_forecasts import ( @@ -29,6 +31,17 @@ from scoring.models import populate_medal_exclusion_records +def print_duration(text, task_start, global_start) -> datetime: + print( + "\033[92m" + f"{text} ---" + f"Task Duration:{str(timezone.now() - task_start).split('.')[0]}, " + f"Total Runtime:{str(timezone.now() - global_start).split('.')[0]}" + "\033[0m" + ) + return timezone.now() + + class Command(BaseCommand): help = """ Migrates old database data to the new one @@ -51,6 +64,8 @@ def add_arguments(self, parser): ) def handle(self, *args, site_ids=None, **options): + start = timezone.now() + task_start = timezone.now() site_ids = [int(x) for x in site_ids.split(",")] with connection.cursor() as cursor: cursor.execute("DROP SCHEMA public CASCADE;") @@ -60,56 +75,60 @@ def handle(self, *args, site_ids=None, **options): # main model migration migrate_users() - print("Migrated users") + task_start = print_duration("Migrated users", task_start, start) migrate_fab_credits() - print("Migrated fab credits") + task_start = print_duration("Migrated fab credits", task_start, start) migrate_questions(site_ids=site_ids) - print("Migrated questions") + task_start = print_duration("Migrated questions", task_start, start) migrate_projects(site_ids=site_ids) - print("Migrated projects") + task_start = print_duration("Migrated projects", task_start, start) migrate_votes() - print("Migrated votes") + task_start = print_duration("Migrated votes", task_start, start) migrate_comments() - print("Migrated comments") + task_start = print_duration("Migrated comments", task_start, start) migrate_comment_votes() - print("Migrated comment votes") + task_start = print_duration("Migrated comment votes", task_start, start) migrate_permissions(site_ids=site_ids) - print("Migrated permissions") + task_start = print_duration("Migrated permissions", task_start, start) migrate_forecasts() - print("Migrated forecasts") + task_start = print_duration("Migrated forecasts", task_start, start) migrate_metaculus_predictions() - print("Migrated Metaculus predictions") + task_start = print_duration("Migrated Metaculus predictions", task_start, start) migrate_mailgun_notification_preferences() - print("Migrated user notification preferences") + task_start = print_duration( + "Migrated user notification preferences", task_start, start + ) # TODO: enable on prod release! - print("\033[93mPost Subscriptions/Following migration is disabled!\033[0m") + print(f"\033[93mPost Subscriptions/Following migration is disabled!\033[0m") # migrate_subscriptions(site_ids=site_ids) - # print("Migrated post subscriptions") + # task_start = print_duration("Migrated post subscriptions", task_start, start) - # scoring + scoring migrate_archived_scores() - print("Migrated archived scores") + task_start = print_duration("Migrated archived scores", task_start, start) score_questions(start_id=options["start_score_questions_with_id"]) - print("Scored questions") + task_start = print_duration("Scored questions", task_start, start) populate_medal_exclusion_records() - print("Populated medal exclusion records") + task_start = print_duration( + "Populated medal exclusion records", task_start, start + ) create_global_leaderboards() - print("Created global leaderboards") + task_start = print_duration("Created global leaderboards", task_start, start) populate_global_leaderboards() - print("Populated global leaderboards") + task_start = print_duration("Populated global leaderboards", task_start, start) populate_project_leaderboards() - print("Populated project leaderboards") + task_start = print_duration("Populated project leaderboards", task_start, start) # stats on questions - print("Running calculate divergence") post_migrate_calculate_divergence() - print("Running calculate movement") + task_start = print_duration("calculated divergence", task_start, start) job_compute_movement() - print("Running build forecasts") + task_start = print_duration("calculated movement", task_start, start) call_command("build_forecasts") - print("Running compute hotness") + task_start = print_duration("built forecasts", task_start, start) compute_hotness() + task_start = print_duration("computed hotness", task_start, start) # Reset sql sequences reset_sequence() diff --git a/migrator/services/migrate_comments.py b/migrator/services/migrate_comments.py index 21ed7d2b61..40ba6255cd 100644 --- a/migrator/services/migrate_comments.py +++ b/migrator/services/migrate_comments.py @@ -53,15 +53,30 @@ def migrate_comment_votes(): comment_ids = Comment.objects.values_list("id", flat=True) vote_instances = [] - vote_instances += [ - create_comment_vote(obj) - for obj in paginated_query("SELECT * FROM metac_question_comment_likes") - if obj["comment_id"] in comment_ids - ] + start = timezone.now() + for i, obj in paginated_query("SELECT * FROM metac_question_comment_likes"): + print( + f"\033[Kmigrating comment votes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) + if obj["comment_id"] in comment_ids: + vote_instances.append(create_comment_vote(obj)) + print( + f"\033[Kmigrating comment votes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + "bulk creating...", + end="\r", + ) CommentVote.objects.bulk_create( vote_instances, ignore_conflicts=True, batch_size=1_000 ) + print( + f"\033[Kmigrating comment votes: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + "bulk creating... DONE", + ) def migrate_comments(): diff --git a/migrator/services/migrate_leaderboards.py b/migrator/services/migrate_leaderboards.py index 8b97aff864..713c85ff62 100644 --- a/migrator/services/migrate_leaderboards.py +++ b/migrator/services/migrate_leaderboards.py @@ -54,7 +54,7 @@ def populate_global_leaderboards(): update_project_leaderboard(main_site_project, leaderboard) entries = LeaderboardEntry.objects.filter(leaderboard=leaderboard).count() print( - "populating:", + "\033[Kpopulating:", i, "/", c, @@ -75,11 +75,9 @@ def populate_project_leaderboards(): print("populating:", i, "/", c, project.name, end="\r") for leaderboard in project.leaderboards.all(): update_project_leaderboard(project, leaderboard) - entries = ( - LeaderboardEntry.objects.filter(leaderboard__project=project).count(), - ) + entries = LeaderboardEntry.objects.filter(leaderboard__project=project).count() print( - "populating:", + "\033[Kpopulating:", i, "/", c, diff --git a/migrator/services/migrate_permissions.py b/migrator/services/migrate_permissions.py index 19983e1334..855ca7da10 100644 --- a/migrator/services/migrate_permissions.py +++ b/migrator/services/migrate_permissions.py @@ -153,9 +153,20 @@ def migrate_common_permissions(site_ids: list): ) ) + print( + f"QuestionProjectPermission.permission affected " + f"project: {user_project_perm_obj['project_id']}", + "bulk creating...", + end="\r", + ) ProjectUserPermission.objects.bulk_create( user_project_perms, batch_size=50_000, ignore_conflicts=True ) + print( + f"QuestionProjectPermission.permission affected " + f"project: {user_project_perm_obj['project_id']}", + "bulk creating... DONE", + ) print( f"Missed projects: {len(total_missed_project_ids)} " f"of the following OLD types: {total_missed_project_types}" diff --git a/migrator/services/migrate_projects.py b/migrator/services/migrate_projects.py index 38cc4eeb80..d678c87702 100644 --- a/migrator/services/migrate_projects.py +++ b/migrator/services/migrate_projects.py @@ -217,7 +217,7 @@ def migrate_topics(question_ids: list[int], q_p_m2m_cls): # Some topics contain inline question ids in topic.question_ids column m2m_queries.append([{"question_id": x} for x in topic_obj["question_ids"]]) print( - f"related_categories:{related_category_ids}" + f"related_categories:{related_category_ids} " f"related_tags:{related_tag_ids} " f"related_projects:{related_project_ids} " f"inline_questions:{topic_obj["question_ids"]}", diff --git a/migrator/services/migrate_questions.py b/migrator/services/migrate_questions.py index a11709b3ec..ffb69f6075 100644 --- a/migrator/services/migrate_questions.py +++ b/migrator/services/migrate_questions.py @@ -247,7 +247,7 @@ def migrate_questions__composite(site_ids: list[int] = None): [tuple(site_ids)], itersize=10000, ), - i, + 1, ): print( f"\033[Kprocessing questions/posts: {i}. " @@ -264,6 +264,10 @@ def migrate_questions__composite(site_ids: list[int] = None): # Some child questions might not have a parent, so we need to exclude such questions if group_id in old_groups: old_groups[group_id]["children"].append(old_question) + print( + f"\033[Kprocessing questions/posts: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + ) print("Migrating notebooks") migrate_questions__notebook(list(old_groups.values())) @@ -553,9 +557,15 @@ def migrate_post_user_snapshots(): post_ids = Post.objects.values_list("id", flat=True) snapshots = [] - for snapshot_obj in paginated_query( - "SELECT * FROM metac_question_questionsnapshot" + start = timezone.now() + for i, snapshot_obj in enumerate( + paginated_query("SELECT * FROM metac_question_questionsnapshot"), 1 ): + print( + f"\033[Kmigrating post user snapshots: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) if snapshot_obj["question_id"] not in post_ids: continue @@ -569,10 +579,27 @@ def migrate_post_user_snapshots(): ) if len(snapshots) >= 5_000: + print( + f"\033[Kmigrating post user snapshots: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + "Bulk creating...", + end="\r", + ) PostUserSnapshot.objects.bulk_create(snapshots) snapshots = [] + print( + f"\033[Kmigrating post user snapshots: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + "Bulk creating...", + end="\r", + ) PostUserSnapshot.objects.bulk_create(snapshots) + print( + f"\033[Kmigrating post user snapshots: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + "Bulk creating... DONE", + ) def migrate_post_snapshots_forecasts(): diff --git a/migrator/services/migrate_scoring.py b/migrator/services/migrate_scoring.py index abef3d4a23..dd6bec54d9 100644 --- a/migrator/services/migrate_scoring.py +++ b/migrator/services/migrate_scoring.py @@ -27,7 +27,13 @@ def migrate_archived_scores(): query_string = "SELECT * FROM metac_question_comboprediction" archived_scores = [] - for comboprediction in paginated_query(query_string): + start = timezone.now() + for i, comboprediction in enumerate(paginated_query(query_string), 1): + print( + f"\033[Kmigrating archived scores: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} ", + end="\r", + ) if (comboprediction["question_id"] not in question_dict) or ( comboprediction["log_score"] is None ): @@ -46,8 +52,19 @@ def migrate_archived_scores(): score_type=Score.ScoreTypes.RELATIVE_LEGACY, ) ) + print( + f"\033[Kmigrating archived scores: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating...", + end="\r", + ) ArchivedScore.objects.all().delete() ArchivedScore.objects.bulk_create(archived_scores) + print( + f"\033[Kmigrating archived scores: {i}. " + f"dur:{str(timezone.now() - start).split('.')[0]} " + "bulk creating... DONE", + ) def score_questions(qty: int | None = None, start_id: int = 0): diff --git a/posts/views.py b/posts/views.py index b53e2048f9..266a32dceb 100644 --- a/posts/views.py +++ b/posts/views.py @@ -469,6 +469,6 @@ def post_related_articles_api_view(request: Request, pk): ObjectPermission.can_view(permission, raise_exception=True) # Retrieve cached articles - articles = get_post_get_similar_articles(post) + articles = [] # get_post_get_similar_articles(post) return Response(PostRelatedArticleSerializer(articles, many=True).data) From 80d4efa9574cdea9d5e43194dcdccd2c2a6e6d12 Mon Sep 17 00:00:00 2001 From: lsabor Date: Wed, 28 Aug 2024 23:25:36 -0700 Subject: [PATCH 4/4] more tweaks --- migrator/management/commands/migrate_old_db.py | 2 +- posts/views.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/migrator/management/commands/migrate_old_db.py b/migrator/management/commands/migrate_old_db.py index 962410d5ec..120b7076c2 100644 --- a/migrator/management/commands/migrate_old_db.py +++ b/migrator/management/commands/migrate_old_db.py @@ -104,7 +104,7 @@ def handle(self, *args, site_ids=None, **options): # migrate_subscriptions(site_ids=site_ids) # task_start = print_duration("Migrated post subscriptions", task_start, start) - scoring + # scoring migrate_archived_scores() task_start = print_duration("Migrated archived scores", task_start, start) score_questions(start_id=options["start_score_questions_with_id"]) diff --git a/posts/views.py b/posts/views.py index 266a32dceb..b53e2048f9 100644 --- a/posts/views.py +++ b/posts/views.py @@ -469,6 +469,6 @@ def post_related_articles_api_view(request: Request, pk): ObjectPermission.can_view(permission, raise_exception=True) # Retrieve cached articles - articles = [] # get_post_get_similar_articles(post) + articles = get_post_get_similar_articles(post) return Response(PostRelatedArticleSerializer(articles, many=True).data)