In [7]:
import aiohttp
import asyncio
import pandas as pd
import json
from tqdm.notebook import tqdm
from aiohttp import ClientSession

API_KEY = '593e3b55'
BASE_URL = 'http://www.omdbapi.com/'


CONCURRENT_REQUESTS = 50  # Adjust as needed

async def search_show_by_name(session, name):
    url = f'{BASE_URL}?apikey={API_KEY}&t={name}&type=series'
    try:
        async with session.get(url) as response:
            data = await response.json()
            if data.get('Response') == 'True':
                return data
            return None
    except Exception:
        return None

async def get_show_episodes(session, imdb_id, season_number):
    url = f'{BASE_URL}?apikey={API_KEY}&i={imdb_id}&Season={season_number}'
    try:
        async with session.get(url) as response:
            data = await response.json()
            if data.get('Response') == 'True':
                return data.get('Episodes', [])
            return None
    except Exception:
        return None

async def fetch_show_details(df):
    semaphore = asyncio.Semaphore(CONCURRENT_REQUESTS)
    
    async def fetch_show(show_name):
        async with semaphore:
            search_result = await search_show_by_name(session, show_name)
            if search_result:
                show_title = search_result.get('Title')
                if show_title.lower() != show_name.lower():
                    return None

                imdb_id = search_result['imdbID']
                show_details = {
                    'title': show_title,
                    'overview': search_result.get('Plot'),
                    'seasons': {}
                }
                
               
                for season_number in range(1, 11):
                    episodes = await get_show_episodes(session, imdb_id, season_number)
                    if episodes:
                        show_details['seasons'][season_number] = []
                        for episode in episodes:
                            episode_info = {
                                'episode_number': episode['Episode'],
                                'episode_title': episode['Title'],
                                'episode_overview': episode.get('Plot', ''),
                                'air_date': episode['Released']
                            }
                            show_details['seasons'][season_number].append(episode_info)
                
                return {show_name: show_details}
            return None
    
    # Track progress with tqdm
    async with ClientSession() as session:
        tasks = [fetch_show(show_name) for show_name in df['name']]
        results = []
        
        for future in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc="Fetching show details"):
            result = await future
            if result:
                results.append(result)
    
    shows_dict = {k: v for result in results if result for k, v in result.items()}
    
    
    with open('omdb_episodes.json', 'w', encoding='utf-8') as json_file:
        json.dump(shows_dict, json_file, ensure_ascii=False, indent=4)
    
    print("Details saved to 'omdb_episodes.json'")


df = pd.read_csv('final_data.csv') 


await fetch_show_details(df)


Fetching show details:   0%|          | 0/11491 [00:00<?, ?it/s]

  handle = events.Handle(callback, args, self, context)


Details saved to 'omdb_episodes.json'


In [31]:
import requests
import pandas as pd
from tqdm import tqdm
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

BASE_URL = 'https://api.tvmaze.com'

def search_show_by_name(name):
    url = f'{BASE_URL}/singlesearch/shows?q={name}'
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.RequestException:
        return None

def get_show_episodes(show_id):
    url = f'{BASE_URL}/shows/{show_id}/episodes'
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.RequestException:
        return None

def fetch_show_details(show_name):
    search_result = search_show_by_name(show_name)
    
    if search_result:
        show_title = search_result.get('name')
        if show_title.lower() != show_name.lower():
            return None
        
        show_id = search_result['id']
        show_details = {
            'title': show_title,
            'overview': search_result.get('summary'),
            'seasons': {}
        }
        
        # Fetch episode details for each show
        episodes = get_show_episodes(show_id)
        if episodes:
            for episode in episodes:
                season_number = episode['season']
                if season_number not in show_details['seasons']:
                    show_details['seasons'][season_number] = []
                
                episode_info = {
                    'episode_number': episode['number'],
                    'episode_title': episode['name'],
                    'episode_overview': episode['summary'],
                    'air_date': episode['airdate']
                }
                show_details['seasons'][season_number].append(episode_info)
        
        return show_name, show_details
    return None

def main():
    
    df = pd.read_csv('final_data.csv')  

    # Process only the first 10 rows for testing
    # df = df.head(10)

    
    shows_dict = {}

   
    with ThreadPoolExecutor(max_workers=10) as executor:
        future_to_show = {executor.submit(fetch_show_details, row['name']): row['name'] for index, row in df.iterrows()}
        for future in tqdm(as_completed(future_to_show), total=len(future_to_show), desc="Fetching show details", leave=False):
            result = future.result()
            if result:
                show_name, show_details = result
                shows_dict[show_name] = show_details

    # Save the nested structure to a JSON file
    with open('shows_details.json', 'w', encoding='utf-8') as json_file:
        json.dump(shows_dict, json_file, ensure_ascii=False, indent=4)

    print("Details saved to 'shows_details.json'")

if __name__ == "__main__":
    main()


                                                                                

Details saved to 'shows_details.json'


In [33]:
593e3b55

Number of show names in the JSON file: 813
