# 최신 데이터 500장 수집

In [None]:
import os
import praw
import requests
import re
import time

# Reddit API 설정
CLIENT_ID = "---------"
CLIENT_SECRET = "-------"
USER_AGENT = "BMI_Image_Scraper_v1.0"

# 저장할 폴더
SAVE_DIR = os.path.abspath("progress_pics")  # 절대 경로로 변경
os.makedirs(SAVE_DIR, exist_ok=True)

# 저장된 게시물 ID 파일
COLLECTED_POSTS_FILE = "collected_posts.txt"

# Reddit API 연결
reddit = praw.Reddit(client_id=CLIENT_ID,
                     client_secret=CLIENT_SECRET,
                     user_agent=USER_AGENT)

# 이미 수집한 게시물 ID 로드
if os.path.exists(COLLECTED_POSTS_FILE):
    with open(COLLECTED_POSTS_FILE, "r") as f:
        collected_posts = set(f.read().splitlines())
else:
    collected_posts = set()

# 새로운 게시물 저장
new_collected_posts = set()

# 크롤링할 서브레딧
subreddit = reddit.subreddit("progresspics")

# 최신 게시물 + 인기 게시물 (다양성 확보)
posts_to_collect = []
posts_to_collect.extend(subreddit.new(limit=500))  # 최신 500개
posts_to_collect.extend(subreddit.top(time_filter="week", limit=500))  # 이번 주 인기 500개

# 중복 제거 & 크롤링
for submission in posts_to_collect:
    if submission.id in collected_posts:
        continue  # 이미 수집한 게시물 건너뛰기

    if submission.url.endswith(("jpg", "jpeg", "png")):
        # 파일명 생성 (제목 그대로 사용하되, 너무 긴 제목 방지)
        title = submission.title
        title = re.sub(r'[\\/*?:"<>|]', "", title)  # OS에서 사용할 수 없는 문자 제거
        title = title.replace(" ", "_")  # 띄어쓰기 → 언더스코어(_) 변경
        title = title.replace("’", "'")  # 특수문자 통일 (’ → ')
        
        # 파일명 길이 제한 (100자까지만 사용)
        title = title[:100] if len(title) > 100 else title

        # 저장 확장자는 .png로 통일
        filename = f"{title}.png"
        filepath = os.path.join(SAVE_DIR, filename)

        # 이미지 다운로드
        response = requests.get(submission.url, stream=True)
        if response.status_code == 200:
            with open(filepath, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"✅ Saved: {filename}")
            new_collected_posts.add(submission.id)  # 수집한 게시물 ID 저장
        else:
            print(f"❌ Failed to download: {submission.url}")

        # Reddit API 요청 속도 조절
        time.sleep(1)

# 새로운 게시물 ID 저장 (중복 방지)
with open(COLLECTED_POSTS_FILE, "a") as f:
    for post_id in new_collected_posts:
        f.write(post_id + "\n")

print("🎉 Image download completed.")


✅ Saved: M215'10_[253lbs__178lbs_=_75lbs]_(12_months)_How_much_further_to_go.png
✅ Saved: M206'1_[168_lbs__162_lbs_=_6_lbs]_(12_months)_Converted_most_of_my_fat_into_lean_mass.png
✅ Saved: F265'5_[260IDK]_(1_12_months).png
✅ Saved: M305'11_[165lbs__185lbs_=_20lbs]_(36_Months)_Made_Some_Changes.png
✅ Saved: F255'4_[250__150_=_100lbs]_(15_months)_I_HIT_100!!.png
✅ Saved: M516'1_[434lbs337lbs=97lbs]_(8_months).png
✅ Saved: F324'11_[267__165_=_102_pounds]__Over_the_course_of_10_years__Currently_focusing_on_toning_up.png
✅ Saved: F235'9”_[165lbs__172_lbs_=_+5lbs]_(4_months)_Ab_Definition_Progress.png
✅ Saved: M216'0_[133_lbs__146_lbs_=_13_lbs]__3_months__I've_been_bulking_and_training_like_crazy._Getting_a_l.png
✅ Saved: F285'4”_[260188=72_pounds]_(13_months)_Hey_look_ma_I_made_it.png
✅ Saved: M345'8”_[185lbs__160lbs_=_25_lbs]_(2_years).png
✅ Saved: F325'3_[220lbs__172lbs_=_48_lbs]_(4_months)_Body_recomp_in_progress..png
✅ Saved: M315'11_[143lbs__210lbs_=_67lbs]_(6_years)_From_when_I_starte

✅ Saved: M266'0”_[140__190_=_50lbs](72_months_6_Years).png
✅ Saved: M185'6_[160__153_=_7_pounds_lost]_(2_months).png
✅ Saved: F215'9_[252__175_=_77]_(40_months)_Almost_Done.png
✅ Saved: M346'1_[265__199_=_66lbs_lost]_(4_years,_28lbs_lost_so_far_in_2025)_First_time_in_10_years_that_the_.png
✅ Saved: F315'4_[170_lbs__150.5_lbs_=19.5_lbs]_(6_months)_Same_outfit,_different_body!.png
✅ Saved: M356'2_[240__245=_5_gained]_it's_not_always_about_weight_loss_but_sometimes_about_overall_look.png
✅ Saved: F21173_cm_[75_kg__75_kg_= 0_kg]_(2_years)_same_weight,_different_body_!.png
✅ Saved: F425'0”_[175144=31]_(9_mo)_Just_crossed_the_halfway_point!.png
✅ Saved: M345'9”_[203__159lbs_=_44lbs]_(6_months)_losing_fat_and_body_recomposition.png
✅ Saved: F255'7”_[143143_=_0]__11_months__Quad_growth_after_cut_and_bulk.png
✅ Saved: F225'3_[242lbs__165lbs_=_77_lbs]_(36_months).png
✅ Saved: F185'8”_[265lbs__180lbs_=_85lbs]_(14_months)_15lbs_away_from_goal!!.png
✅ Saved: F285'2_[96_KG__72_KG_=_24_KG]_(18_months

✅ Saved: M195'10”_[54.7kg120.6lbs__59.7kg131.6lbs_=_5kg11lbs]_(5_months).png
✅ Saved: F235'2_[200_lbs__140135_lbs_=_60-65_lbs]_(1_year).png
✅ Saved: F295”3'_[295235=_60lbs]_(8_months)_-_Same_outfit_10_months_apart,_and_8_months_since_I_started_losin.png
✅ Saved: F275'3”_[102kg225lbs__62kg137lbs_=_40kg88lbs]_(10_months).png
✅ Saved: M295'9_[109kg__77kg_=32kg]_(8_month)_What_a_wonderful_weightloss_journey.png
✅ Saved: F405'5”[258157.9=100.1lbs]_(15_months)_Feels_good_to_hit_that_100_mark.png
✅ Saved: M215'11_[241.5__147.9_=_93.6_pounds_lost]_(about_7_months)_help!.png
✅ Saved: F275'5”[178lbs__128lbs]_50_lbs_gone_in_about_6_months_only_3_more_lbs_to_go_for_GW.png
✅ Saved: F305'6_[222lbs_180lbs_=42]_(15_months)_Same_dress,_a_year_and_a_bit_apart.png
✅ Saved: F305'9_[470310=160](1year,__6months)_still_have_a_bit_to_go..png
✅ Saved: F205'4_[186lbs__147lbs_=_39lbs]_(7_months)_haven't_made_much_progress_in_the_last_couple_months,_but.png
✅ Saved: F20165cm5'5_[75.3kg162lbs__60.6kg133.6lbs_=_12.

✅ Saved: F345'10_[230lb175lb]=55lb_(8months)_Lifting_and_cardio.png
✅ Saved: M235”11_[63KG81KG]_9_Months_gym.png
✅ Saved: F295'4_[289_lbs__208_lbs_=_81_lbs]_(9_months)_Finally_starting_to_feel_comfortable_in_my_own_skin!.png
✅ Saved: F344'11”_[122_lbs__100_lbs=_22_lbs]_(9_months).png
✅ Saved: M315'10”_[170lbs__215lbs_=_45lbs]_(6_years).png
✅ Saved: X265'7”_[185_lbs__160_lbs_=_25_lbs]_(12_months)_longtime_lurker_finally_feeling_confident_enough_to_.png
✅ Saved: M356'1_[270lbs__205_lbs_=_65_lbs]_(5_months)_Feeling_great!.png
✅ Saved: M196'0_[75kg__71kg_=_4kg]_(12_months)_taking_it_slow_but_I_feel_healthier.png
✅ Saved: F275'2_[262lbs__135lbs=_127lbs]_(almost_2_years)_Think_I'm_done_actively_trying_to_lose_weight_and_r.png
✅ Saved: M346'3”_[205_185_=_20]_(6_months).png
✅ Saved: F255'4_[93lbs__125lbs_=_+_32]_(24_months)_Quit_drinking,_vaping,_smoking.png
✅ Saved: F245'3_[197lbs__188lbs_=_9lbs]_(1_month)_Has_anyone_ever_been_able_to_lose_their_apron_belly_Most_pe.png
✅ Saved: F366'3_[240lbs

✅ Saved: M435'8_[154lbs__178lbs_=_20lbs]_(24_months)2_years_of_bulking__recomp.png
✅ Saved: F205'3_[130lbs__110lbs_=_20lbs]_(36_months)_even_though_it's_not_a_ton_of_weight,_my_ba_definitely_s.png
✅ Saved: M295'7”_[158__168lbs_=_10lbs]_(2_Months)_Bulking_Process.png
✅ Saved: M286'3”_[300210=90lbs]_(5_years)_I_like_to_post_an_update_one_a_year..png
✅ Saved: F305'8_[263lbs__168lbs_=_95lbs]_(13_months)_tried_so_hard_to_break_100lbs_lost_in_a_year.png
✅ Saved: F265'9”_[172__145_=_27lbs]_(1_year)_celebrating_12_months_of_consistency_against_all_the_odds_💪🏼.png
✅ Saved: M215'10_[73kg__84kg_=_11kg]_(4_months)_Lean_bulk.png
✅ Saved: M265'10_[150__175]_18_Months.png
✅ Saved: F505'3”_[350147=203](3_years)I_am_an_official_gym_rat_now!.png
✅ Saved: F27152cm_[71kg__67kg_=_4kg]__5_months__The_time_will_pass_anyway.png
✅ Saved: F265'2”_[250160_=_90lbs](1-2_years)__Holy_cow!.png
✅ Saved: F185'6_[240lbs__197lbs_=_43lbs_lost]_(1_year).png
✅ Saved: F33165cm_[96kg__91kg_=_5kg]_(56_months)_It_ain't_much,_b

✅ Saved: F295”3'_[295241_=_54lbs]_(7_months)_-_started_getting_in_shape_in_June,_finishing_university_allowed.png
✅ Saved: F355'5_[320__275_=_45_lbs.]__5.5_months__Different_lighting_but_I_see_it_in_my_face_proportions_more.png
✅ Saved: F305'6”_[478lbs__330lbs_=_148lbs_lost]_(36_months)_Plateaued,_but_trying_to_kickstart_the_loss_again.png
✅ Saved: M336'4”_[392__310_=_82lbs_Lost]_(24_months)_Big_Chungus_Hopper_to_Regular_Hop!.png
✅ Saved: F304'11_[210__165_=_45lbs]_(24_months)_Longest_time_I've_ever_gone_not_giving_up_on_the_gym.png
✅ Saved: M355'11_[225lbs__175lbs_=_50lbs]_(8_months)_Finally_decided_to_get_in_shape_for_the_first_time_in_my.png
✅ Saved: F345'3”_[129lbs_123lbs_=_6lbs_lost]_(12_months)_glute_gains_while_losing_weight..png
✅ Saved: F575'2_[200122=78](12_months)_feeling_better!.png
✅ Saved: F255'9_[190lbs__160lb_=_30lbs]_(3_Months)_Same_dress_I_wore_last_summer!.png
✅ Saved: M286'3_[178190=12lbs_gain]_(14_weeks)_Duplicate_with_front_body_from_this_post.png
✅ Saved: F305'2_

✅ Saved: M516'1_[434lbs337lbs=97lbs]_(8_months).png
✅ Saved: M355'10”_[311lbs__225lbs_=_86lbs]_(23_months)_I_haven't_posted_an_update_in_a_while!.png
✅ Saved: M325'11_[236__223_=_13lbs_lost]_(3_Months)_I_can't_sleep_until_I_at_least_do_cardio_tonight.png
✅ Saved: F225'11_[127kg__111kg_=_16kg]_(2_months).png
✅ Saved: M346'1_[265__199_=_66lbs_lost]_(4_years,_28lbs_lost_so_far_in_2025)_First_time_in_10_years_that_the_.png
✅ Saved: M285'11_[320lbs__195lbs_=125lbs]_(5_years)_2020_23_yr's_old_vs_2025_28_yr's_old_turning_29_in_Septem.png
✅ Saved: F27167cm_[95kg209lb__88kg193lb_=_7kg16lb]_(8_months)_Not_a_huge_change_in_numbers_but_a_more_importa.png
✅ Saved: F325'2_[165__145_=_20_pounds]_(10_months)_all_ive_changed_is_my_diet_and_my_eating_habits..png
✅ Saved: M376'[240__170_=_70]_3_years._Happy_to_answer_any_questions_that_can_helpinspire.png
✅ Saved: F216'1”_[275lbs__230lbs_=_45lbs]_(12_months)_really_feeling_my_new_look.png
✅ Saved: F27152cm_[70kg__66kg_=_4_kg]_6.5_months.png
✅ Saved: M266

# 랜덤 데이터 수집

In [None]:
import os
import praw
import requests
import re
import time

# Reddit API 설정
CLIENT_ID = "-------"
CLIENT_SECRET = "-------"
USER_AGENT = "BMI_Image_Scraper_v1.0"

# 저장할 폴더
SAVE_DIR = os.path.abspath("progress_pics")  # 절대 경로로 변경
os.makedirs(SAVE_DIR, exist_ok=True)

# 저장된 게시물 ID 파일
COLLECTED_POSTS_FILE = "collected_posts.txt"

# Reddit API 연결
reddit = praw.Reddit(client_id=CLIENT_ID,
                     client_secret=CLIENT_SECRET,
                     user_agent=USER_AGENT)

# 이미 수집한 게시물 ID 로드
if os.path.exists(COLLECTED_POSTS_FILE):
    with open(COLLECTED_POSTS_FILE, "r") as f:
        collected_posts = set(f.read().splitlines())  # 중복 방지용
else:
    collected_posts = set()

# 새로운 게시물 저장
new_collected_posts = set()

# 크롤링할 서브레딧
subreddit = reddit.subreddit("progresspics")

# 랜덤 2000개 가져오기
try:
    posts_to_collect = list(subreddit.random_rising(limit=2000))
except Exception as e:
    print(f"❌ API 요청 실패: {e}")
    posts_to_collect = []

# 만약 데이터가 없으면 최신 2000개 가져오기 (백업 플랜)
if not posts_to_collect:
    print("⚠️ random_rising()에서 데이터를 가져오지 못함. 최신 게시물 2000개를 대신 가져옴.")
    posts_to_collect = list(subreddit.new(limit=2000))

# 중복 제거 & 크롤링
downloaded = 0  # 다운로드된 이미지 수 카운트
for submission in posts_to_collect:
    if submission.id in collected_posts:
        continue  # 이미 수집한 게시물이면 건너뛰기

    if submission.url.endswith(("jpg", "jpeg", "png")):
        # 파일명 생성 (제목 그대로 사용하되, 너무 긴 제목 방지)
        title = submission.title
        title = re.sub(r'[\\/*?:"<>|]', "", title)  # OS에서 사용할 수 없는 문자 제거
        title = title.replace(" ", "_")  # 띄어쓰기 → 언더스코어(_) 변경
        title = title.replace("’", "'")  # 특수문자 통일 (’ → ')
        
        # 파일명 길이 제한 (100자까지만 사용)
        title = title[:100] if len(title) > 100 else title

        # 저장 확장자는 .png로 통일
        filename = f"{title}.png"
        filepath = os.path.join(SAVE_DIR, filename)

        # 이미지 다운로드
        response = requests.get(submission.url, stream=True)
        if response.status_code == 200:
            with open(filepath, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"✅ Saved: {filename}")
            new_collected_posts.add(submission.id)  # 수집한 게시물 ID 저장
            downloaded += 1
        else:
            print(f"❌ Failed to download: {submission.url}")

        # Reddit API 요청 속도 조절
        time.sleep(1)

# 새로운 게시물 ID 저장 (중복 방지)
if new_collected_posts:
    with open(COLLECTED_POSTS_FILE, "a") as f:
        for post_id in new_collected_posts:
            f.write(post_id + "\n")

print(f"🎉 Image download completed. Total images saved: {downloaded}")


✅ Saved: F265'_(154cm)_[72lbs__101lbs_=_+29lbs]_(3_years)_It_feels_amazing_to_finally_be_a_healthy_weight!.png
✅ Saved: M186'2_[195lbs__175lbs_=_20lbs]_(12_months)_Really_proud_of_my_progress_and_I_thought_I'd_share_with.png
✅ Saved: M276'3_[312__210=_-102lbs]_(6_years)_Trust_the_process,_it_works!!.png
🎉 Image download completed. Total images saved: 3


In [None]:
import os
import praw
import requests
import re
import time

# Reddit API 설정
CLIENT_ID = "-------"
CLIENT_SECRET = "-------"
USER_AGENT = "BMI_Image_Scraper_v1.0"

# 저장할 폴더
SAVE_DIR = os.path.abspath("progress_pics")  # 절대 경로로 변경
os.makedirs(SAVE_DIR, exist_ok=True)

# 저장된 게시물 ID 파일
COLLECTED_POSTS_FILE = "collected_posts.txt"

# Reddit API 연결
reddit = praw.Reddit(client_id=CLIENT_ID,
                     client_secret=CLIENT_SECRET,
                     user_agent=USER_AGENT)

# 이미 수집한 게시물 ID 로드
if os.path.exists(COLLECTED_POSTS_FILE):
    with open(COLLECTED_POSTS_FILE, "r") as f:
        collected_posts = set(f.read().splitlines())  # 중복 방지용
else:
    collected_posts = set()

# 새로운 게시물 저장
new_collected_posts = set()

# 크롤링할 서브레딧
subreddit = reddit.subreddit("progresspics")

# ✅ 여러 번 요청하여 2000개 데이터를 확보
posts_to_collect = []
try:
    for _ in range(5):  # 400개씩 5번 요청 (총 2000개)
        posts_to_collect.extend(list(subreddit.random_rising(limit=400)))
        print(f"🔄 현재 수집된 게시물 개수: {len(posts_to_collect)}")
        time.sleep(1)  # API 제한 방지를 위해 3초 대기
except Exception as e:
    print(f"❌ API 요청 실패: {e}")

# ✅ 만약 데이터가 충분하지 않다면, 최신 게시물 추가 수집
if len(posts_to_collect) < 2000:
    print("⚠️ random_rising()에서 충분한 데이터를 확보하지 못함. 최신 게시물 추가 수집 중...")
    posts_to_collect.extend(list(subreddit.new(limit=2000 - len(posts_to_collect))))

# 중복 제거 & 크롤링
downloaded = 0  # 다운로드된 이미지 수 카운트
for submission in posts_to_collect:
    if submission.id in collected_posts:
        continue  # 이미 수집한 게시물이면 건너뛰기

    if submission.url.endswith(("jpg", "jpeg", "png")):
        # 파일명 생성 (제목 그대로 사용하되, 너무 긴 제목 방지)
        title = submission.title
        title = re.sub(r'[\\/*?:"<>|]', "", title)  # OS에서 사용할 수 없는 문자 제거
        title = title.replace(" ", "_")  # 띄어쓰기 → 언더스코어(_) 변경
        title = title.replace("’", "'")  # 특수문자 통일 (’ → ')
        
        # 파일명 길이 제한 (100자까지만 사용)
        title = title[:100] if len(title) > 100 else title

        # 저장 확장자는 .png로 통일
        filename = f"{title}.png"
        filepath = os.path.join(SAVE_DIR, filename)

        # 이미지 다운로드
        response = requests.get(submission.url, stream=True)
        if response.status_code == 200:
            with open(filepath, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"✅ Saved: {filename}")
            new_collected_posts.add(submission.id)  # 수집한 게시물 ID 저장
            downloaded += 1
        else:
            print(f"❌ Failed to download: {submission.url}")

        # Reddit API 요청 속도 조절
        time.sleep(1)

# 새로운 게시물 ID 저장 (중복 방지)
if new_collected_posts:
    with open(COLLECTED_POSTS_FILE, "a") as f:
        for post_id in new_collected_posts:
            f.write(post_id + "\n")

print(f"🎉 Image download completed. Total images saved: {downloaded}")


🔄 현재 수집된 게시물 개수: 25
🔄 현재 수집된 게시물 개수: 50
🔄 현재 수집된 게시물 개수: 75
🔄 현재 수집된 게시물 개수: 100
🔄 현재 수집된 게시물 개수: 125
⚠️ random_rising()에서 충분한 데이터를 확보하지 못함. 최신 게시물 추가 수집 중...
✅ Saved: M286'3_[178190=12lbs_gain]_(14_weeks)._Upper_body_progress..png
✅ Saved: F305'6''_[217__187_=_30]_4_months.png
✅ Saved: M386'1_[486.6lbs__361.5lbs_=_150lbs]__120_months_(ten_years)__Many_downs_and_ups_in_ten_years,_curre.png
✅ Saved: M255'8”_[155lbs__175lbs_=_20lbs]__3_Months__Bulking_until_spring_and_then_cutting_for_summer._Trying.png
✅ Saved: F255'3”_[225lbs__171lbs_=54lbs]_(6_months)__Did_it_with_diet_for_a_while,_now_starting_to_introduce_.png
✅ Saved: M315'8[195lb270lb=-75lb]_(2.5_years)_even_after_all_these_years_I'm_afraid_to_eat_food.png
✅ Saved: M255'7”_[185lbs_135lbs_=_50lbs_lost]_(18_months)_Been_a_long_road_mentally_and_physically,_but_I'm_p.png
✅ Saved: M365'10”_[265.7lbs_252.2lbs_=_13.5lbs]_(26_days)_2025_progress.png
✅ Saved: M316'0[223210=13_lbs_lost]_(1_month)_Goal_is_around_180.png
✅ Saved: M236'1”_[29

✅ Saved: M205'5_[170__147=23lbs]_[10_months].png
✅ Saved: F235'5_[151lbs__144lbs_=_7lbs]__2_months__Small_wins.png
✅ Saved: M215'9”_[139lbs__167lbs_=_28lbs]_(3_years)_Got_sober_for_a_bit,_started_working_construction_and_eat.png
✅ Saved: F255'5”_[235lb+__140lb_=_95lb]_(26_months)_2_years_of_relearning_life_habits.png
✅ Saved: F345'6_[181lbs__127lbs_=_54lbs]_(19_Months)_Maintenance.png
✅ Saved: M385'6_[220lbs160lbs_=60_lbs]_(4_months).png
✅ Saved: F264'11_[250lbs__229lbs=_21lbs_lost]_(5_months)_Started_weight_lifting.png
✅ Saved: F325'3_[189lbs__147lbs_=_42lbs]_(5_Months)_Weight_loss_progress.png
✅ Saved: F325'3”_[129119=_-10lbs]_(8_months)_working_on_having_more_muscle..png
✅ Saved: F365'3_[185lbs__130lbs_=_55lbs]__12_months__Same_shirt_one_year_apart.png
✅ Saved: F245'4_[112kg__87kg_=_25kg_loss]_(12_months)_Not_reached_my_overall_target_yet_but_slow_sustainable_.png
✅ Saved: F265'2_[170lbs__155lbs_=_15lbs]_(5_Months)_The_weight_on_my_driver's_license_is_finally_accurate_aga.png
✅ Save

✅ Saved: M285'3_[145_lbs147lbs=_+2lbs]_(~2_months)_Started_strength_training._It's_small,_but_I'm_happy_with_.png
✅ Saved: M336'0”_[174lbs__174lbs_=_0lbs]_(1_year)_Training_from_home_with_dumbbells_only_so_leg_progress_is_s.png
✅ Saved: M225'7”_[140Ibs__125Ibs_=_15Ibs]_(3.5_Months)_First_time_taking_it_all_seriously_and_very_proud_so_f.png
✅ Saved: M265'10_[155lbs__155lbs_=_0_lbs]_(1_year)_Yes,_it's_full_circle_in_rotation_ironically_.png
✅ Saved: F295'9”_[373210=163lbs_loss]_(over_2_years_progress)_proud_of_all_the_progress_I_have_made.png
✅ Saved: M286'0_[240LBS__171_LBS_=_69_lbs]_(8_years)_Thought_this_was_a_good_amount_of_pounds_lost_for_an_upd.png
✅ Saved: F305'5_[255lbs230lbs_=_25lbs_lost]_(6_months)_Focusing_on_sustainable_weight_loss_and_a_healthy_rela.png
✅ Saved: F225'3”_[181lbs__158lbs_=_23lbs]_(4_months)_still_a_long_way_to_go_but_happy_with_the_progress_so_fa.png
✅ Saved: M346'2”_[165lbs__200lbs_=35lbs]_(10_years)_From_Timid_Twink_to_Tawdry_Thirstrapper_A_Decade_of_Gains.p

✅ Saved: F325'5_[215_180_=_35lbs]_(7months)_mainly_weights_and_60g_protein,_going_to_add_cardio_and_up_my_pro.png
✅ Saved: M255'11”[171lbs184lbs=_13lbs]__~2.5_months__Bulk.png
✅ Saved: M365'9_[202192_=_10lbs]__1.5_months__TRT_+_3-5_daysweek_in_the_gym._.png
✅ Saved: F425'7”_[172.4__139.8_=_32.6]_(12_months)_41st_birthday_-_42nd_birthday.png
✅ Saved: F305'6”_[141lbs__142lbs=+1lb]_(3months)_the_scale_doesn't_show_the_full_picture.png
✅ Saved: M285”11_[135145]_(5_months)_Been_grinding_the_gym_three_days_a_week_and_liking_the_results_lately._.png
✅ Saved: M365'10”_[283_lbs__242_lbs_=_41_lbs]_(5_months).__.png
✅ Saved: M306'0[280__230_=_50_lost]_(7_Months)_Not_sure_if_I_formatted_this_correctly,_I'm_pretty_new_to_Redd.png
✅ Saved: M206'0_[180lb__175lb=5lb]_~9_Months.png
✅ Saved: M315'6_[357__295_=_62_down]_(4_Months)_Face_Gains.png
✅ Saved: F335'2_[245205_=_40_lbs]_(18_months)_Not_pictured_the_inner_glow-up_🙂.png
✅ Saved: M316'6_[260205=55lbs](12_months)_1_year_down!_.png
✅ Saved: M376'0.5”

✅ Saved: M195'10''_[94lbs__175lbs_=_81lbs]_(2_years)_.png
✅ Saved: M256'8_[590lbs__395lbs_=_195lbs]_(16_months).png
✅ Saved: M255'10”_[195lbs__178lbs_=_17lbs]_(3_months)_weight_loss_progress.png
✅ Saved: M245”11_[205-170lbs=35]18months_.png
✅ Saved: M296'1”_[310__175_=_135lbs]_(4_years)_Been_playing_the_long_game..png
✅ Saved: M296'0_[235__208_=_27lbs]_(2_years)__Dad_of_4._Made_lifestylediet_changes_&_did_off_&_on_calisthenic.png
✅ Saved: F275'8_[208lbs__184lbs_=_24lbs]_(2_months)_Super_happy_for_these_subtle_progress_changes_&_the_stren.png
✅ Saved: F205'4_[186150=36lbs]_(6_months)_almost_there!_.png
✅ Saved: F275'2_[270Ibs__180Ibs_=_90Ibs]_(2_12_years).png
✅ Saved: F275'6_[232__186_=_46lbs]_(18_months)_I_am_losing_motivation_and_have_been_maintaining_for_3_months_.png
✅ Saved: F355'5_[227177=50lbs]_(2_years)_I'm_half_way_to_my_goal_weight!!!!_Celebrate_with_me!!!!Whoooo!_.png
✅ Saved: M346'3'_[205lbs__188lbs_=_17lbs]_(1_Month)_Losing_weight._Ready_to_gain_muscle_now!.png
✅ Saved: M29

✅ Saved: F375'4_[248__124_lbs_=_124_lbs]_pushing_3_years._Still_working_on_losing_fat_and_gaining_muscle._.png
✅ Saved: F205'7_[286lbs__200lbs_=_86lbs]_(6_months)_I_feel_like_this_is_not_40kg_worth_of_face_weight_loss_.png
✅ Saved: M346'1_[198lbs__172lbs_=_26lbs]_(6_Months)_How_2024_started_vs_how_it_ended.png
✅ Saved: M315'8_[192lbs__157lbs_=__35lbs]_10_month_progress__.png
✅ Saved: M475'6_[175lbs__150lbs_=_25_lbs]_(7_months)_I'm_9_months_sober_now_&_feeling_like_a_new_human..png
✅ Saved: F185'6”_[240lbs__200lbs_=_40lbs]_(1_year)_More_photos_comparing_before_and_after_losing_40lbs.png
✅ Saved: F285'3_[220lbs__184_lbs_=_36lbs]_(5_months)_Lightest_I've_been_in_6_years..png
✅ Saved: M365'10”_[97kg__88kg_=-9kg]_(recomp_between_July_-_Jan,_lost_6_inches_on_my_waist,_change_of_lifesty.png
✅ Saved: F285'0”_[72kg__49kg_=_23kg]_(7_months)_Weight_loss_progress_.png
✅ Saved: F33175cm_[308lbs__173lbs_=_135lbs_lost]_This_is_hard,_the_more_I_lose,_the_more_critical_I_am_of_my_.png
✅ Saved: F325'4_[

In [None]:
import os
import praw
import requests
import re
import time

# Reddit API 설정
CLIENT_ID = "-------"
CLIENT_SECRET = "-------"
USER_AGENT = "BMI_Image_Scraper_v1.0"

# 저장할 폴더
SAVE_DIR = os.path.abspath("progress_pics")  # 절대 경로로 변경
os.makedirs(SAVE_DIR, exist_ok=True)

# 저장된 게시물 ID 파일
COLLECTED_POSTS_FILE = "collected_posts.txt"

# Reddit API 연결
reddit = praw.Reddit(client_id=CLIENT_ID,
                     client_secret=CLIENT_SECRET,
                     user_agent=USER_AGENT)

# 이미 수집한 게시물 ID 로드
if os.path.exists(COLLECTED_POSTS_FILE):
    with open(COLLECTED_POSTS_FILE, "r") as f:
        collected_posts = set(f.read().splitlines())  # 중복 방지용
else:
    collected_posts = set()

# 새로운 게시물 저장
new_collected_posts = set()

# 크롤링할 서브레딧
subreddit = reddit.subreddit("progresspics")

# ✅ 다양한 소스를 활용해 2000개 확보
posts_to_collect = []

try:
    # ✅ 랜덤한 인기 게시물 (무작위 샘플링)
    for _ in range(10):  # 100개씩 5번 요청 (총 500개)
        fetched_posts = list(subreddit.random_rising(limit=100))
        posts_to_collect.extend(fetched_posts)
        print(f"🔄 랜덤 게시물 수집 진행: {len(posts_to_collect)}개 확보됨")
        time.sleep(2)

    # ✅ 연간 인기 게시물 추가 (500개)
    fetched_top_year = list(subreddit.top(time_filter="year", limit=1000))
    posts_to_collect.extend(fetched_top_year)
    print(f"🏆 연간 인기 게시물 추가: {len(fetched_top_year)}개 확보됨")

    # ✅ 전체 기간 인기 게시물 추가 (500개)
    fetched_top_all = list(subreddit.top(time_filter="all", limit=1000))
    posts_to_collect.extend(fetched_top_all)
    print(f"🌍 전체 인기 게시물 추가: {len(fetched_top_all)}개 확보됨")

except Exception as e:
    print(f"❌ API 요청 실패: {e}")

# ✅ 중복 제거
unique_posts = [post for post in posts_to_collect if post.id not in collected_posts]
print(f"✅ 중복 제거 후 남은 게시물 개수: {len(unique_posts)}개")

# 중복 제거 & 크롤링
downloaded = 0  # 다운로드된 이미지 수 카운트
for submission in unique_posts:
    if submission.id in collected_posts:
        continue  # 이미 수집한 게시물이면 건너뛰기

    if submission.url.endswith(("jpg", "jpeg", "png")):
        # 파일명 생성 (제목 그대로 사용하되, 너무 긴 제목 방지)
        title = submission.title

        # **Windows에서 허용되지 않는 문자 제거**
        title = re.sub(r'[\\/*?:"<>|]', "", title)

        # **개행 문자(`\n`) 및 앞뒤 공백 제거**
        title = title.replace("\n", "").strip()

        # **띄어쓰기 → 언더스코어(_) 변경**
        title = title.replace(" ", "_")

        # **특수문자 변환 (’ → ')**
        title = title.replace("’", "'")

        # **파일명 길이 제한 (100자까지만 사용)**
        title = title[:100] if len(title) > 100 else title

        # **저장 확장자는 .png로 통일**
        filename = f"{title}.png"
        filepath = os.path.join(SAVE_DIR, filename)

        # 이미지 다운로드
        response = requests.get(submission.url, stream=True)
        if response.status_code == 200:
            with open(filepath, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"✅ Saved: {filename}")
            new_collected_posts.add(submission.id)  # 수집한 게시물 ID 저장
            downloaded += 1
        else:
            print(f"❌ Failed to download: {submission.url}")

        # Reddit API 요청 속도 조절
        time.sleep(1)

# 새로운 게시물 ID 저장 (중복 방지)
if new_collected_posts:
    with open(COLLECTED_POSTS_FILE, "a") as f:
        for post_id in new_collected_posts:
            f.write(post_id + "\n")

print(f"🎉 Image download completed. Total images saved: {downloaded}")


🔄 랜덤 게시물 수집 진행: 24개 확보됨
🔄 랜덤 게시물 수집 진행: 48개 확보됨
🔄 랜덤 게시물 수집 진행: 72개 확보됨
🔄 랜덤 게시물 수집 진행: 96개 확보됨
🔄 랜덤 게시물 수집 진행: 120개 확보됨
🔄 랜덤 게시물 수집 진행: 144개 확보됨
🔄 랜덤 게시물 수집 진행: 168개 확보됨
🔄 랜덤 게시물 수집 진행: 192개 확보됨
🔄 랜덤 게시물 수집 진행: 216개 확보됨
🔄 랜덤 게시물 수집 진행: 240개 확보됨
🏆 연간 인기 게시물 추가: 978개 확보됨
🌍 전체 인기 게시물 추가: 995개 확보됨
✅ 중복 제거 후 남은 게시물 개수: 1172개
✅ Saved: F355'11_[235lbs__146.8lbs_=_88.2lbs]_took_me_4_years_in_total_with_2_pregnancies_inbetween..png
✅ Saved: F275'6”_[154lbs__117_=_37lbs]_(2_years)_dang,_still_can't_believe_I_lost_the_weight,_now_trying_to_g.png
✅ Saved: F335'2”[145lbs__138lbs_=_7lbs](10_months)_2024_went_alright._Hoping_for_more_in_2025!.png
✅ Saved: M366'1”_[173lbs__210lbs_=_37lbs]_(3_years).png
✅ Saved: F295'0”_[88kg__68kg_=_20kg]_(4_years).png
✅ Saved: F355'1”[178lbs134lbs=44lbs](4_years)_Lifting_weights_has_completely_changed_my_life💪🏻.png
✅ Saved: F275'2_[247lbs__203lbs_=_44lbs]_(3_months)_Weight_Loss_Progress,_still_a_ways_to_go!.png
✅ Saved: M505'10_[202162=40lbs]_(60_months)_Nutrition_c

✅ Saved: M326'1_[230lbs__210lbs_=20lbs]_(_1_year_)_weight_loss_quitting_the_bars.png
✅ Saved: F355'11_[235lbs__146.8lbs_=_88.2lbs]_took_me_4_years_in_total_with_2_pregnancies_inbetween..png
✅ Saved: F295'2”_[195lbs__135lbs_=_60lbs]_(3_years)_Would_like_to_lose_another_15lbs.png
✅ Saved: F325'5”_[215176=39lbs](8m)_CICO,_protein_and_weights.png
✅ Saved: M195'11_[247.7lbs__178.8lbs_=_68.9lbs]__19_months__Proud_of_how_far_I've_come__emotionally_and_physi.png
✅ Saved: M256'_[350lbs__200lbs_=_150_pounds]_(18_months)_Weight_loss_completely_transformed_my_face,_people_I.png
✅ Saved: F305'7_[190lbs__160lbs_=_30lbs]__8_months__Got_tired_of_slowly_outgrowing_my_clothes_year_after_year.png
✅ Saved: F295'8_[302lbs-241lbs=61lbs]_(14_months).png
✅ Saved: M505'10_[202162=40lbs]_(60_months)_Nutrition_changes_and_exercise..png
✅ Saved: F295'0”_[88kg__68kg_=_20kg]_(4_years).png
✅ Saved: F325'7”_[190lbs__131lbs_=59_lbs]_(9_years)_Lots_of_ups_and_downs_over_the_years_but_feeling_better_t.png
✅ Saved: M326'

✅ Saved: F305'7_[190lbs__160lbs_=_30lbs]__8_months__Got_tired_of_slowly_outgrowing_my_clothes_year_after_year.png
✅ Saved: F185'3__[175__157_=_18lb]_(2-3_months)_Still_going_but_finally_see_some_progress!!.png
✅ Saved: F325'7”_[190lbs__131lbs_=59_lbs]_(9_years)_Lots_of_ups_and_downs_over_the_years_but_feeling_better_t.png
✅ Saved: M256'_[350lbs__200lbs_=_150_pounds]_(18_months)_Weight_loss_completely_transformed_my_face,_people_I.png
✅ Saved: F325'5”_[215176=39lbs](8m)_CICO,_protein_and_weights.png
✅ Saved: F275'2_[247lbs__203lbs_=_44lbs]_(3_months)_Weight_Loss_Progress,_still_a_ways_to_go!.png
✅ Saved: F355'1”[178lbs134lbs=44lbs](4_years)_Lifting_weights_has_completely_changed_my_life💪🏻.png
✅ Saved: M505'10_[202162=40lbs]_(60_months)_Nutrition_changes_and_exercise..png
✅ Saved: F295'8_[302lbs-241lbs=61lbs]_(14_months).png
✅ Saved: M326'1_[230lbs__210lbs_=20lbs]_(_1_year_)_weight_loss_quitting_the_bars.png
✅ Saved: F335'2”[145lbs__138lbs_=_7lbs](10_months)_2024_went_alright._Hoping_for

✅ Saved: F315'6”_[240__185_=_55_lbs]__Slowly_but_surely.png
✅ Saved: F245'8_[218lbs__148lbs_=_-70lbs]_always_use_the_first_pic_as_my_“before”_pic_and_unintentionally_wor.png
✅ Saved: F255'7_[236191=45]__7.5_months_Finally_feel_like_I_can_post!.png
✅ Saved: F215'4”_[180-170_10lbs_lost]_~2_months_in.png
✅ Saved: F235'2”_[140lbs__127lbs_=_13lbs]__1.5_years__gained_30lbs_the_first_month_I_was_sober,_been_trying_t.png
✅ Saved: F285'7”_[241_lbs_-_213_lbs_=_28_lbs]_(1_yr)_About_halfway_there!.png
✅ Saved: F285'7”_[255lbs__222.1_lbs_=_32.9_lbs]_(5_months)_Felt_like_I_couldn't_see_a_huge_difference_until_I.png
✅ Saved: F245'6[280225=55lbs]_it's_so_insane_how_different_this_dress_looks_on_me!!_I've_come_so_far_and_so_c.png
✅ Saved: F265'5_[189_lbs__165_lbs_=_24_lbs]_over_the_course_of_8_months!.png
✅ Saved: F315'7_[216lbs__175lbs_=_41lbs]_(9_months)_Halfway_to_my_goal._Want_to_start_the_focus_on_strength_t.png
✅ Saved: M345'6”_[270lbs170lbs=100lbs]_(11_months).png
✅ Saved: F315'6_[241__148_=_92l

✅ Saved: F375'4”_[153140=13lbs]_5_Months.png
✅ Saved: F325'8_[278lbs__238lbs_=40lbs]_(9_months)_hormone_imbalance_treatment_was_the_best_thing_I_did_for_m.png
✅ Saved: F425'11”_[100kg__63kg_=_37]_(1_year).png
✅ Saved: M495'9_[230lbs__148lbs_160lbs=70lbs]_(72_months)_Phase_One_Weight_Loss,_Phase_Two_Gym_NSFW.png
✅ Saved: F275'4”_[247+lbs189lbs=58lbs]_(20_months)_I_still_can't_believe_how_much_my_weight_loss_has_changed_.png
✅ Saved: F435'4.5”_[275148=127lbs_down,_2_years]_mental_health_matters!.png
✅ Saved: F315'3_[220-160=60lbs_lost]_(24_months).png
✅ Saved: F265'3_[208173=35lbs]_1_year_difference!.png
✅ Saved: F245'8''_[170172]_slight_recomp_This_is_about_4_months_apart.png
✅ Saved: F315'5_[150_lbs120_lbs_=_30_lbs]_(2_years)_quit_binge_drinking_and_fell_in_love_with_the_gym.png
✅ Saved: F235'2_[235186=49lb_lost]_(11_months)_same_pair_of_jeans,_almost_a_year_apart_).png
✅ Saved: F235'3”_[110lbs145lbs125lbs=_20lbs_gain]_(11_months)_Progress_is_not_linear._I_ruined_it_mid_year,_n.png
✅ S

✅ Saved: F305'5_[190lbs__125lbs_=_65lbs]__42months__Proud_every_day_of_how_far_I've_come.png
✅ Saved: F275‘8_[211lbs__182lbs_=_-29lbs]_1_year-ish__i_really_love_comparing_the_same_outfit_on_me.png
✅ Saved: M325'8”_[250__209_=_41_lbs]_2.5_Months_I_feel_amazing!.png
✅ Saved: F315'3.5”_[232lb__163lb_=_69lb]_(14months)_First_25lb_lost_by_just_eating_at_a_calorie_deficit,_the_.png
✅ Saved: M206'1”_[270lbs__195lbs_=_75lbs]_(1.5_years)_Fat_loss_progress.png
✅ Saved: M305'9_[255___170_=_85_lbs]_(8_months).png
✅ Saved: F245'5''_[184lbs__142lbs_=_42lbs]_12_months_progress.png
✅ Saved: F265'7_[280lbs__160lbs_=_120lbs]_(3_years)_Face_gains.png
✅ Saved: F305'9”_[245195=-50]_One_year's_progress.png
✅ Saved: F305'6”_[206lbs__181lbs_=_25lbs]_Five_months_progress!.png
✅ Saved: F295'4_[200lbs__149lbs_=_51]_(6_months)_On_my_way.png
✅ Saved: M376'1”_[325lbs199lbs=126_down]_(8_months)_got_healthy_for_the_kids.png
✅ Saved: F235'6_[100_lbs__150_lbs_=_50_lbs_gained]_(3,5_ish_years)_I_still_can't_believe_my_ol

✅ Saved: F275'4_[247lbs199lbs=48lbs]_(13_months)_You_were_so_kind_about_my_face_gains_yesterday_-_here's_my_b.png
✅ Saved: F355'2”_[285lbs__175lbs_=_110lbs]_trying_to_motivate_myself_to_keep_going.png
✅ Saved: M336'0_[190lbs__132lbs_=_58lbs]_(14_months)_Time_for_a_gaining_phase.png
✅ Saved: F325'4_[231_179_=_52]_(6_months)_I've_never_been_able_to_see_my_abs_before!.png
✅ Saved: F325'4”_[225__225]_4_months_and_not_seeing_movement_on_the_scale_but_is_there_recomp.png
✅ Saved: F245'11”_[158lbs__170lbs_=_12lbs]_(2_years)_natural_transformation.png
✅ Saved: F305'6”_[200140=60_lbs]_-_8_years_-_21_year_old_me_vs_30_year_old_me,_and_over_a_year_postpartum_in_.png
✅ Saved: F265'8”_[282204=_78lb_loss]_(1_year_CICO).png
✅ Saved: F265'7”_[185~lbs__145lbs_=_40lbs]_WIP_&_M265'10”_[135lbs__180lbs_=_45lbs].png
✅ Saved: F415'3”_[197.8__182.5_=_15.3lbs]__2_months__I_lost_a_chin!.png
✅ Saved: M386'1_[500lbs__324lbs_=_170lbs]_-_2_Years_Time.png
✅ Saved: F325'2”_[130__118_=_12_lbs]_1_year_of_working_out_an

❌ Failed to download: https://i.imgur.com/iRyY8gT.jpg
❌ Failed to download: https://i.imgur.com/IAna5ny.jpg
❌ Failed to download: https://i.imgur.com/m9O8BkE.jpg
❌ Failed to download: https://i.imgur.com/TzuubiS.jpg
❌ Failed to download: https://i.imgur.com/rDjR6Bj.jpg
❌ Failed to download: https://i.imgur.com/XktKelY.jpg
❌ Failed to download: https://i.imgur.com/zIiqlS2.jpg
❌ Failed to download: http://i.imgur.com/2am3Tm1.jpg
❌ Failed to download: https://i.imgur.com/UQCjn79.jpg
❌ Failed to download: http://i.imgur.com/2YBXQVe.jpg
❌ Failed to download: http://i.imgur.com/03YBge3.jpg
❌ Failed to download: https://i.imgur.com/sjzlWSa.jpg
❌ Failed to download: http://i.imgur.com/dx93YSv.jpg
❌ Failed to download: http://i.imgur.com/obHbvRP.jpg
❌ Failed to download: https://i.imgur.com/D9Ksrua.jpg
❌ Failed to download: https://i.imgur.com/BnKDb8i.jpg
❌ Failed to download: https://i.imgur.com/m9kBqff.jpg
❌ Failed to download: https://i.imgur.com/IEAgFYp.jpg
❌ Failed to download: https://i.i

✅ Saved: F225'5_[190127_=_-63]_can't_believe_the_leg_difference!.png
✅ Saved: F225'6_[211lbs__139lbs_=_72lbs]_(18_months)_I_decided_to_see_if_I_could_lose_any_weight_with_diet_af.png
✅ Saved: M315'8_[350__198_=_152lbs]_(15_months)_I_lost_150_pounds_and_gained_a_neck!.png
✅ Saved: F275'6”_[320lbs__192bs_=_128lbs]_(1_year_and_5_months)_Only_12lbs_to_go_until_I've_reached_my_first_.png
✅ Saved: M365'7”_[602lbs__210lbs_=_392lbs_lost]_(2_years).png
✅ Saved: F246'6_[286lbs__180lbs_=_106lbs]_=_4_yr.png
✅ Saved: M265'9”_[330lbs191lbs=139lbs]_goodbye_obesity.png
✅ Saved: M226'1_[340lbs__174lbs_=_176lbs]_Officially_lost_more_than_I_now_weigh,_face_gains_progress_after_ju.png
✅ Saved: F355'9”_[401_lbs__173_lbs_=_228_lbs]_(4_years)_Maintaining_and_happy.png
✅ Saved: F275'0_[245133=112lbs]_Transformation_Monday!.png
✅ Saved: F295'8_[265lbs__215lbs_=_50lbs]_A_different_kind_of_progress_pic_-_I_was_having_a_low_day_thinking_m.png
✅ Saved: F285'7_[257lbs__125lbs_=_133lbs]_(12_months)_I'm_delighted_to_

✅ Saved: F215'10”_[240lbs__165lbs_=_75lbs]_Today_is_the_first_anniversary_of_the_official_start_of_my_weight_.png
✅ Saved: F325'7_[255_lbs_150_=_105_lbs]_bridesmaids_to_bride,_4_years_maintaining.png
✅ Saved: F295'2_[315_lbs__135_lbs_=_180_lbs]_I_want_to_post_to_encourage_everyone_that_change_is_possible._It.png
✅ Saved: F325'3_[163lb__118lb_=_45lb]_(4_years)_Slow_progress,_but_I_got_there_eventually!.png
✅ Saved: F405'9_[270lbs__148lbs_=_122lbs]_(10_years)_MtF_transgender,_2_years_HRT;_Am_I_making_progress_💜.png
✅ Saved: F495'4_[214lbs__137lbs_=_77lbs]_One_Year_Today.png
✅ Saved: F205'7_[203_lbs141_lbs=62_lbs_lost]_over_5_months_)_after_leaving_a_toxic_relationship.png
✅ Saved: F326'1_[258_lbs__158_lbs_=_100_lbs]_(2_years)_Guys_-_I_finally_did_it._I'm_100_lbs(!!)_lighter_than_.png
✅ Saved: M205'9_[387-190=197lbs](1_year)_blah_could've_turned_out_better_looking_but_a_win_is_a_win.png
✅ Saved: F345'3_[90_lbs_-_120_lbs_=_30_lb_gain]_6_months_clean_of_opiates_and_benzo's._Gained_some_clea

✅ Saved: M335'10”_[275__163_=_112_lbs]_Told_me_I_have_to_start_putting_myself_out_there._Here_I_am._😬.png
✅ Saved: F285'0_[226lbs__130lbs_=_96lbs]_4lbs_away_from_100lbs_loss!_I_got_this!.png
✅ Saved: F375'9_[266_lbs__140_lbs_=_126_lbs]_(15_mo._losing,_6_mo._maintaining)_Keep_some_of_your_old_clothes.png
✅ Saved: F515'6_[158kgs__75kgs_=_83kgs_]_(2_years)_It's_never_too_late._Never_give_up._Living_a_life_I_never_.png
✅ Saved: F195'8_[235lbs__178lbs_=_57lbs]_Just_got_some_new_running_gear_and_was_feeling_confident!_I_still_ha.png
✅ Saved: F245'7_[310_-_210_=_100]_two_years_ago_I_left_a_shitty_relationship_and_my_ex_told_me_I'd_go_nowhere.png
✅ Saved: F595'3''_[177__117]_=_60lbs_lost_(6_months)._This_is_my_mum_who_was_told_by_her_doctor_that_she_had_.png
✅ Saved: F295'9”_[214175=39lbs]_Jan_6_-_June_20._Just_hit_6_months_sober_and_been_working_out_and_eating_clea.png
✅ Saved: F315'6”_[235lbs__160lbs_=_75lbs]_(15_months)._Hit_my_first_GW_).png
✅ Saved: F305'2”_[000000=000]_(3_years)_I_believ

✅ Saved: F335'6”_[367lbs__265.8lbs_=_101.2lbs]_(1_year)_Posting_my_in-progress_pic_to_celebrate_my_365-day_st.png
❌ Failed to download: https://imgur.com/5SgyOvr.jpg
✅ Saved: F236'1_[326156=170]_(two_and_a_half_years_so_far!)_This_has_been_a_wild_journey_but_I'm_finally_star.png
❌ Failed to download: https://i.imgur.com/ozKhrNB.jpg
✅ Saved: F305'9_[282_lbs__185_lbs_=_97lbs_lost]_Sun_in_the_sky,_you_know_how_I_feel...birds_flying_high_you_k.png
✅ Saved: M296'2[274lbs__219lbs_=_55lbs]_(15_months)_check_it_out_y'all._I_have_a_neck_now.png
✅ Saved: F205'4”_[190__130_=_-60lbs]_from_size_16_to_size_4._I_feel_healthy_and_confident,_still_working_to_t.png
✅ Saved: F275'6”[170lbs170lbs=0lbs]Weight_loss_progress.png
✅ Saved: F265'7[173132=41lbs]_August_til_now._The_Pandemic_forced_me_to_reexamine_a_lot_of_my_habits,_which_i.png
✅ Saved: M226'7_[400lbs__229lbs_=_171lbs]_(9_months)_9_pounds_away_from_hitting_my_initial_goal!!_Updating_he.png
✅ Saved: F285'5”[279lbs178lbs=101lbs]_I_did_it!_It_FINAL

✅ Saved: F255'7''_[185_lbs__142_lbs_=_43_lbs]_Now_I_have_a_jawline.png
✅ Saved: F245'9”_[274lbs__199.8lbs_=_74.2lbs]_hit_onederland_and_Found_my_waist_this_morning!!.png
✅ Saved: F325'3[393.8lbs__263.8lbs_=_130lbs]_(1_year)_Yesterday_521_marked_my_1_year_anniversary_of_being_sle.png
✅ Saved: F295'1”_[252lbs__140lbs_=_121lbs]_my_journey_thus_far._Balanced_diet_and_exercise..png
✅ Saved: F285'7”_[233lbs__129lbs_=_104lbs]_My_last_post_seemed_to_inspire_some_of_you_guys,_I'm_hoping_this_o.png
✅ Saved: F265'4_[170145=25lbs]_5_months_of_putting_in_work!.png
✅ Saved: F305'5.5_[175lb__140_lb_=_35lb]_(6_months)_I_honestly_have_never_looked_better_in_my_life._Only_20_l.png
✅ Saved: F255'7''_[195lbs__165lbs_=_30lbs]_It's_so_hard_to_look_at_those_old_photo's_and_I'm_nervous_to_share.png
✅ Saved: F335'7”_[298lbs__198lbs_=_100lbs]_When_I_set_a_goal_a_little_over_a_year_ago_to_lose_100lbs_I_never_.png
✅ Saved: F276'0”_[263__196_=_67_lbs]_Two_years_without_drugs_or_alcohol!_Was_a_run_of_the_mill_bag_l

In [None]:
import os
import cv2
import torch
import shutil
import numpy as np
from ultralytics import YOLO

# YOLO 모델 로드 (YOLOv8-pose)
model = YOLO("yolov8x-pose.pt")  # YOLOv8 pose 모델 경로 (다운로드 필요)

# 이미지 폴더 경로
IMAGE_DIR = r"C:\Users\USER\Desktop\test 데이터 크롤링\전처리\progress_pics"

# 삭제할 이미지 리스트
deleted_images = []

# 폴더 내 모든 이미지 처리
for image_name in os.listdir(IMAGE_DIR):
    image_path = os.path.join(IMAGE_DIR, image_name)
    
    # 이미지 로드
    img = cv2.imread(image_path)
    if img is None:
        print(f"❌ 이미지 로드 실패: {image_name}")
        continue
    
    # YOLO로 키포인트 추출
    results = model(image_path)
    
    for result in results:
        keypoints = result.keypoints.xy.cpu().numpy()  # (num_people, 17, 2) 형태
        
        if len(keypoints) == 0:
            print(f"🚫 전신 없음: {image_name} (사람 없음)")
            deleted_images.append(image_name)
            os.remove(image_path)
            break  # 이미지 삭제 후 다음 이미지로 이동
        
        # 첫 번째 사람의 키포인트만 사용 (한 사람만 있다고 가정)
        person_keypoints = keypoints[0]  # (17, 2)

        # 감지된 키포인트 개수 확인 (x, y 값이 있는 경우만 카운트)
        valid_keypoints = np.count_nonzero(~np.isnan(person_keypoints), axis=0)[0]
        
        if valid_keypoints < 12:  # 17개 중 5개 이상 누락된 경우
            print(f"🚫 전신 미검출 (키포인트 부족): {image_name} ({valid_keypoints}/17)")
            deleted_images.append(image_name)
            os.remove(image_path)
            break

print(f"\n✅ 이미지 필터링 완료! 총 {len(deleted_images)}개 이미지 삭제됨.")
