In [None]:
from dotenv import load_dotenv
load_dotenv()
import requests
import os
from tqdm import tqdm

In [None]:
API_KEY = os.getenv("TMDB_API_KEY")

In [None]:
GENRE_IDS = {
    'action': 28,
    'comedy': 35, 
    'horror': 27,
    'romance': 10749
}
BASE_URL = 'https://api.themoviedb.org/3'
POSTER_BASE_URL = 'https://image.tmdb.org/t/p/w500'

In [None]:


def download_posters(num_per_genre=2000):
    session = requests.Session()
    for genre_name, genre_id in GENRE_IDS.items():
        os.makedirs(f"../data/new_data/{genre_name}", exist_ok=True)
        print(f"\nDownloading {genre_name} posters...")
        
        downloaded = 0
        page = 1
        
        while downloaded < num_per_genre:
            try:
                response = session.get(
                    f"{BASE_URL}/discover/movie",
                    params={
                        'api_key': API_KEY,
                        'with_genres': genre_id,
                        'sort_by': 'popularity.desc',
                        'page': page,
                        'language': 'en-US',
                        'include_adult': 'false'
                    }
                )
                
                if response.status_code != 200:
                    print(f"Error on page {page}: {response.status_code}")
                    break
                    
                movies = response.json()['results']
                if not movies:
                    break
                
                for movie in movies:
                    if downloaded >= num_per_genre:
                        break
                        
                    if not movie.get('poster_path'):
                        continue
                        
                    poster_url = f"{POSTER_BASE_URL}{movie['poster_path']}"
                    filename = f"../data/new_data/{genre_name}/{movie['id']}.jpg"
                    
                    if os.path.exists(filename):
                        continue

                    with tqdm(total=1, desc=f"{genre_name} {downloaded+1}/{num_per_genre}", 
                            bar_format="{l_bar}{bar:20}{r_bar}", leave=False) as pbar:
                        try:
                            response = session.get(poster_url, timeout=15)
                            if response.status_code == 200:
                                with open(filename, 'wb') as f:
                                    f.write(response.content)
                                downloaded += 1
                                pbar.update(1)
                        except Exception as e:
                            continue
                            
                page += 1
                time.sleep(0.5)  
                
            except Exception as e:
                print(f"Critical error: {str(e)}")
                break

        print(f"Downloaded {downloaded} {genre_name} posters")

download_posters(num_per_genre=500)