Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions modules/post_slug_update_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# python manage.py shell 이후 아래 커멘드 기반 실행
"""
import threading
import asyncio
from modules.post_slug_update_batch import main # main() 함수가 정의된 파일에서 import

def run_async_main():
asyncio.run(main())

# 별도 스레드에서 실행
thread = threading.Thread(target=run_async_main)
thread.start()
thread.join() # 스레드가 끝날 때까지 대기
"""

import asyncio

import aiohttp
import environ
from aiohttp.client import ClientSession
from asgiref.sync import sync_to_async

from modules.token_encryption.aes_encryption import AESEncryption
from posts.models import Post
from scraping.apis import fetch_all_velog_posts, fetch_velog_user_chk
from users.models import User

env = environ.Env()
KEYS = [
"...",
]


async def update_user_posts(session: ClientSession, user: User) -> None:
aes_key_index = (user.group_id % 100) % 10
aes_key = env(f"AES_KEY_{aes_key_index}").encode()
# aes_key = KEYS[(user.group_id % 100) % 10].encode()
aes_encryption = AESEncryption(aes_key)
access_token = aes_encryption.decrypt(user.access_token)
refresh_token = aes_encryption.decrypt(user.refresh_token)

_, user_data = await fetch_velog_user_chk(
session, access_token, refresh_token
)
username = user_data["data"]["currentUser"]["username"] # type: ignore
fetched_posts = await fetch_all_velog_posts(
session, username, access_token, refresh_token
)

for post in fetched_posts:
try:
target_post = await sync_to_async(Post.objects.get)(
user=user,
post_uuid=post["id"],
)
target_post.slug = post["url_slug"]
await target_post.asave()
except Post.DoesNotExist:
continue


async def main() -> None:
users = await sync_to_async(list)(User.objects.all())
async with aiohttp.ClientSession() as session:
tasks = [update_user_posts(session, user) for user in users]
await asyncio.gather(*tasks)


if __name__ == "__main__":
asyncio.run(main())
68 changes: 66 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions posts/migrations/0004_post_slug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 5.1.4 on 2025-02-06 01:45

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("posts", "0003_post_released_at"),
]

operations = [
migrations.AddField(
model_name="post",
name="slug",
field=models.CharField(
blank=True,
max_length=500,
null=True,
verbose_name="게시글 슬러그",
),
),
]
6 changes: 6 additions & 0 deletions posts/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ class Post(TimeStampedModel):
title = models.CharField(
blank=False, null=False, max_length=255, verbose_name="제목"
)
slug = models.CharField(
blank=True,
null=True,
max_length=500,
verbose_name="게시글 슬러그",
)
released_at = models.DateTimeField(
blank=False, null=True, verbose_name="게시글 업로드 일시"
)
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ django-timescaledb = "^0.2.13"
psycopg = { extras = ["binary"], version = "^3.2.3" }
aiohttp = "^3.11.10"
aiohttp-retry = "^2.9.1"
uvicorn = "^0.34.0"
gunicorn = "^23.0.0"


[tool.poetry.group.dev.dependencies]
Expand Down
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ asttokens==3.0.0
attrs==24.3.0
cffi==1.17.1
cfgv==3.4.0
click==8.1.8
coverage==7.6.9
cryptography==43.0.3
decorator==5.1.1
Expand All @@ -20,6 +21,8 @@ django-timescaledb==0.2.13
executing==2.1.0
filelock==3.16.1
frozenlist==1.5.0
gunicorn==23.0.0
h11==0.14.0
identify==2.6.3
idna==3.10
iniconfig==2.0.0
Expand Down Expand Up @@ -55,6 +58,7 @@ sqlparse==0.5.3
stack-data==0.6.3
traitlets==5.14.3
typing_extensions==4.12.2
uvicorn==0.34.0
virtualenv==20.28.0
wcwidth==0.2.13
yarl==1.18.3
8 changes: 8 additions & 0 deletions run-kill.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

# 운영계에서만 사용할 script
# chmod +x run.sh

pkill -9 -ef "gunicorn"
echo "====================== process kill clear, check out the result ======================"
sleep 1
26 changes: 26 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# 운영계에서만 사용할 script
# chmod +x run.sh
set -e # 에러 발생 시 스크립트 중지
source ./run-kill.sh

# 가상환경 액티베이션 & 라이브러리 설치
source ./.venv/bin/activate
pip install -r requirements.txt

# export DJANGO_SETTINGS_MODULE=config.settings.prod
python manage.py collectstatic --no-input
python manage.py migrate

sleep 1
gunicorn --workers 2 --bind 0.0.0.0:8000 --log-level=info \
--log-file=./gunicorn.log --access-logfile=./gunicorn-access.log \
--error-logfile=./gunicorn-error.log --max-requests=1200 \
--max-requests-jitter=100 config.wsgi:application --daemon

sleep 1

ps -ef | grep gunicorn | grep -v grep
sleep 1
tail ./gunicorn-access.log
1 change: 1 addition & 0 deletions scraping/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
posts(input: $input) {
id
title
url_slug
likes
released_at
}
Expand Down
26 changes: 15 additions & 11 deletions scraping/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,17 @@ async def update_old_tokens(
user: User,
aes_encryption: AESEncryption,
user_cookies: dict[str, str],
old_access_token: str,
old_refresh_token: str,
origin_access_token: str,
origin_refresh_token: str,
) -> None:
"""토큰 만료로 인한 토큰 업데이트"""
response_access_token, response_refresh_token = (
user_cookies["access_token"],
user_cookies["refresh_token"],
)
if response_access_token != old_access_token:
if response_access_token != origin_access_token:
user.access_token = aes_encryption.encrypt(response_access_token)
if response_refresh_token != old_refresh_token:
if response_refresh_token != origin_refresh_token:
user.refresh_token = aes_encryption.encrypt(response_refresh_token)

try:
Expand All @@ -73,6 +73,7 @@ async def bulk_create_posts(
post_uuid=post["id"],
title=post["title"],
user=user,
slug=post["url_slug"],
released_at=post["released_at"],
)
for post in fetched_posts
Expand Down Expand Up @@ -107,6 +108,7 @@ async def update_daily_statistics(
if not created:
daily_stats.daily_view_count = stats["data"]["getStats"]["total"] # type: ignore
daily_stats.daily_like_count = post.get("likes", 0)
daily_stats.updated_at = self.get_local_now()
await daily_stats.asave(
update_fields=["daily_view_count", "daily_like_count"]
)
Expand All @@ -118,12 +120,12 @@ async def process_user(
aes_key_index = (user.group_id % 100) % 10
aes_key = self.env(f"AES_KEY_{aes_key_index}").encode()
aes_encryption = AESEncryption(aes_key)
old_access_token = aes_encryption.decrypt(user.access_token)
old_refresh_token = aes_encryption.decrypt(user.refresh_token)
origin_access_token = aes_encryption.decrypt(user.access_token)
origin_refresh_token = aes_encryption.decrypt(user.refresh_token)

# 토큰 유효성 검증
user_cookies, user_data = await fetch_velog_user_chk(
session, old_access_token, old_refresh_token
session, origin_access_token, origin_refresh_token
)
if not (user_data or user_cookies):
return
Expand All @@ -139,19 +141,21 @@ async def process_user(
user,
aes_encryption,
user_cookies,
old_access_token,
old_refresh_token,
origin_access_token,
origin_refresh_token,
)

username = user_data["data"]["currentUser"]["username"] # type: ignore
fetched_posts = await fetch_all_velog_posts(
session, username, old_access_token, old_refresh_token
session, username, origin_access_token, origin_refresh_token
)

await self.bulk_create_posts(user, fetched_posts)

tasks = [
fetch_post_stats(post["id"], old_access_token, old_refresh_token)
fetch_post_stats(
post["id"], origin_access_token, origin_refresh_token
)
for post in fetched_posts
]
statistics_results = await asyncio.gather(*tasks)
Expand Down