In [None]:
import requests
import csv
import threading
from queue import Queue
import calendar

API_KEY = "api_key"  # Reemplaza con tu clave de TMDb
BASE_URL = "https://api.themoviedb.org/3/discover/movie"
NUM_THREADS = 25  # N√∫mero de threads en paralelo


# Archivo CSV para exportar los datos
csv_filename = "movies_tmdb_filt.csv"
lock = threading.Lock()

# Lista para almacenar los datos temporalmente
movies_data = []

def fetch_data(year, month, day, page_queue):
    while not page_queue.empty():
        page = page_queue.get()
        params = {
            "api_key": API_KEY,
            "language": "es-ES",
            "sort_by": "release_date.asc",
            "primary_release_year": year,
            "watch_region": "AR",
            "with_watch_providers": "337|8|119|2|3|350|531|339|167|1899|2302",
            "page": page
        }
        if day:
            params["primary_release_date.gte"] = f"{year}-{month:02d}-{day:02d}"
            params["primary_release_date.lte"] = f"{year}-{month:02d}-{day:02d}"
        elif month:
            params["primary_release_date.gte"] = f"{year}-{month:02d}-01"
            params["primary_release_date.lte"] = f"{year}-{month:02d}-{calendar.monthrange(year, month)[1]}"
        
        response = requests.get(BASE_URL, params=params)
        
        if response.status_code == 200:
            results = response.json().get("results", [])
            page_data = [(
                movie.get('id'),
                movie.get('title', 'Desconocido'),
                movie.get('release_date', 'Desconocido'),
                movie.get('backdrop_path', 'N/A'),
                movie.get('genre_ids', []),
                movie.get('original_language', 'N/A'),
                movie.get('original_title', 'Desconocido'),
                movie.get('overview', 'N/A'),
                movie.get('popularity', 0),
                movie.get('poster_path', 'N/A'),
                movie.get('vote_average', 0),
                movie.get('vote_count', 0)
            ) for movie in results]
            
            with lock:
                movies_data.extend(page_data)
        else:
            print(f"‚ö†Ô∏è Error en la p√°gina {page} del a√±o {year}, mes {month}, d√≠a {day}: {response.status_code} - {response.text}")
        
        page_queue.task_done()

# Iterar por cada a√±o desde 1930 hasta 2025
for year in range(1890, 2025):
    print(f"üìÖ Procesando el a√±o: {year}")
    page_queue = Queue()
    
    params = {
        "api_key": API_KEY,
        "language": "es-ES",
        "sort_by": "release_date.asc",
        "primary_release_year": year,
        "watch_region": "AR",
        "with_watch_providers": "337|8|119|2|3|350|531|339|167|1899|2302",
        "page": 1
    }
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code == 200:
        total_pages = response.json().get("total_pages", 1)
        
        if total_pages > 500:
            print(f"‚ö†Ô∏è El a√±o {year} tiene m√°s de 500 p√°ginas, iterando por meses.")
            for month in range(1, 13):
                print(f"üìÜ Procesando el mes: {month} de {year}")
                page_queue = Queue()
                params["primary_release_date.gte"] = f"{year}-{month:02d}-01"
                params["primary_release_date.lte"] = f"{year}-{month:02d}-{calendar.monthrange(year, month)[1]}"
                response = requests.get(BASE_URL, params=params)
                
                if response.status_code == 200:
                    month_pages = response.json().get("total_pages", 1)
                    print(f"üìÑ Total de p√°ginas para {year}-{month}: {month_pages}")
                    
                    if month_pages > 500:
                        print(f"‚ö†Ô∏è El mes {month} de {year} tiene m√°s de 500 p√°ginas, iterando por d√≠as.")
                        for day in range(1, calendar.monthrange(year, month)[1] + 1):
                            page_queue = Queue()
                            params["primary_release_date.gte"] = f"{year}-{month:02d}-{day:02d}"
                            params["primary_release_date.lte"] = f"{year}-{month:02d}-{day:02d}"
                            response = requests.get(BASE_URL, params=params)
                            
                            if response.status_code == 200:
                                day_pages = response.json().get("total_pages", 1)
                                print(f"üìÑ Total de p√°ginas para {year}-{month}-{day}: {day_pages}")
                                for page in range(1, min(day_pages, 500) + 1):
                                    page_queue.put(page)
                                
                                threads = []
                                for _ in range(NUM_THREADS):
                                    thread = threading.Thread(target=fetch_data, args=(year, month, day, page_queue))
                                    thread.start()
                                    threads.append(thread)
                                
                                for thread in threads:
                                    thread.join()
                    else:
                        for page in range(1, min(month_pages, 500) + 1):
                            page_queue.put(page)
                        
                        threads = []
                        for _ in range(NUM_THREADS):
                            thread = threading.Thread(target=fetch_data, args=(year, month, None, page_queue))
                            thread.start()
                            threads.append(thread)
                        
                        for thread in threads:
                            thread.join()
        else:
            print(f"üì¢ Total de p√°ginas para {year}: {total_pages}")
            for page in range(1, total_pages + 1):
                page_queue.put(page)
            
            threads = []
            for _ in range(NUM_THREADS):
                thread = threading.Thread(target=fetch_data, args=(year, None, None, page_queue))
                thread.start()
                threads.append(thread)
            
            for thread in threads:
                thread.join()
    else:
        print(f"‚ö†Ô∏è Error al obtener datos iniciales para {year}: {response.status_code} - {response.text}")

# Guardar los datos en un archivo CSV
with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["ID", "T√≠tulo", "Fecha de Estreno", "Backdrop Path", "G√©neros", "Idioma Original", "T√≠tulo Original", "Descripci√≥n", "Popularidad", "Poster Path", "Puntaje Promedio", "Cantidad de Votos"])
    writer.writerows(movies_data)

print(f"‚úÖ Archivo CSV generado: {csv_filename}")


üìÖ Procesando el a√±o: 1890
üì¢ Total de p√°ginas para 1890: 1
üìÖ Procesando el a√±o: 1891
üì¢ Total de p√°ginas para 1891: 1
üìÖ Procesando el a√±o: 1892
üì¢ Total de p√°ginas para 1892: 1
üìÖ Procesando el a√±o: 1893
üì¢ Total de p√°ginas para 1893: 1
üìÖ Procesando el a√±o: 1894
üì¢ Total de p√°ginas para 1894: 1
üìÖ Procesando el a√±o: 1895
üì¢ Total de p√°ginas para 1895: 1
üìÖ Procesando el a√±o: 1896
üì¢ Total de p√°ginas para 1896: 1
üìÖ Procesando el a√±o: 1897
üì¢ Total de p√°ginas para 1897: 1
üìÖ Procesando el a√±o: 1898
üì¢ Total de p√°ginas para 1898: 1
üìÖ Procesando el a√±o: 1899
üì¢ Total de p√°ginas para 1899: 1
üìÖ Procesando el a√±o: 1900
üì¢ Total de p√°ginas para 1900: 1
üìÖ Procesando el a√±o: 1901
üì¢ Total de p√°ginas para 1901: 1
üìÖ Procesando el a√±o: 1902
üì¢ Total de p√°ginas para 1902: 1
üìÖ Procesando el a√±o: 1903
üì¢ Total de p√°ginas para 1903: 1
üìÖ Procesando el a√±o: 1904
üì¢ Total de p√°ginas para 1904: 1
üìÖ Proce