# Fancaps-parser

## Install and import libraries

In [None]:
! pip install -q beautifulsoup4 tqdm httpx[http2]==0.24.1

In [None]:
import os
import re
import csv
import time
import json
import httpx
import random

import concurrent.futures
from bs4 import BeautifulSoup
from tqdm.auto import tqdm

In [None]:
output_dir = './'

movies_list_filename = 'movies_list.csv'
tv_list_filename = 'tv_list.csv'
anime_list_filename = 'anime_list.csv'

movies_data_filename = 'movies_data.json'
tv_data_filename = 'tv_data.json'
anime_data_filename = 'anime_data.json'

os.makedirs(output_dir, exist_ok=True)

movies_list_filepath = os.path.join(output_dir, movies_list_filename)
tv_list_filepath = os.path.join(output_dir, tv_list_filename)
anime_list_filepath = os.path.join(output_dir, anime_list_filename)

movies_data_filepath = os.path.join(output_dir, movies_data_filename)
tv_data_filepath = os.path.join(output_dir, tv_data_filename)
anime_data_filepath = os.path.join(output_dir, anime_data_filename)

## Bypass Cloudflare settings

In [None]:
user_agent = '...' # Paste you user-agent header
cf_clearance = '...' # Paste cf_clearance token

headers = {
    'Host': 'fancaps.net',
    'User-Agent': user_agent,
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br, zstd'
}

cookies = {
    'cf_clearance': cf_clearance
}

## Parse lists

In [None]:
def split_title_and_year(movie_title: str):
    match = re.match(r"^(.*)\((\d{4})\)$", movie_title.strip())
    if match:
        title = match.group(1).strip()
        year = int(match.group(2))
        return title, year

    return movie_title, None


def split_title_and_season(tv_serial_title: str):
    match = re.match(r"^(.*) Season (\d+)$", tv_serial_title.strip())
    if match:
        title = match.group(1).strip()
        season = int(match.group(2))
        return title, season
    else:
        return tv_serial_title, None


def parse_id_in_url(url: str):
    return int(url.split('?', 1)[1].split('-', 1)[0])


def get_fancaps_list_items(url, headers, cookies):
    response = httpx.get(url, headers=headers, cookies=cookies, timeout=60)

    status_code = response.status_code
    if status_code != 200:
        print(f'Failed to fetch the website! Status code: {status_code}')
        return None

    soup = BeautifulSoup(response.content, 'html.parser')
    results_content = soup.find('div', class_='single_post_content')
    rows = results_content.find_all('div', class_='row')

    return rows


def parse_fancaps_movies_list(url, headers, cookies):
    results = []
    rows = get_fancaps_list_items(url, headers, cookies)
    if rows is None:
        return results

    for row in rows:
        h4_tag = row.find('h4')
        if not h4_tag:
            continue # Skip add block

        a_tag = h4_tag.find('a')
        title_parsed = a_tag.text.strip()[:-7] # Remove ' Images'

        parsed_url = a_tag['href']
        title, year = split_title_and_year(title_parsed)
        id = int(parsed_url.partition('&movieid=')[2])

        results.append((title, year, id))

    return results


def parse_fancaps_tv_list(url, headers, cookies):
    results = []
    rows = get_fancaps_list_items(url, headers, cookies)
    if rows is None:
        return results

    for row in rows:
        h4_tag = row.find('h4')
        if not h4_tag:
            continue # Skip add block

        a_tag = h4_tag.find('a')
        title_parsed = a_tag.text.strip()

        parsed_url = a_tag['href']
        title, season = split_title_and_season(title_parsed)
        id = parse_id_in_url(parsed_url)

        results.append((title, season, id))

    return results


def parse_fancaps_anime_list(url, headers, cookies):
    results = []
    rows = get_fancaps_list_items(url, headers, cookies)
    if rows is None:
        return results

    for row in rows:
        h4_tag = row.find('h4')
        if not h4_tag:
            continue # Skip add block

        a_tag = h4_tag.find('a')
        title = a_tag.text.strip()

        span = row.find('span')
        alternative_title = span.text.strip()

        parsed_url = a_tag['href']
        id = parse_id_in_url(parsed_url)

        results.append((title, alternative_title, id))

    return results


def dump_fancaps_lists_to_csv(results, filename, header):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)
        for result in results:
            writer.writerow(result)

### Parse movie list

In [None]:
movies_list_url = 'https://fancaps.net/movies/showList.php?%'
movies_list = parse_fancaps_movies_list(movies_list_url, headers, cookies)

print('Total parsed:', len(movies_list))

In [None]:
movies_list_csv_headers = ['Title', 'Year', 'Id']

dump_fancaps_lists_to_csv(movies_list, movies_list_filepath, movies_list_csv_headers)

### Parse tv list

In [None]:
tv_list_url = 'https://fancaps.net/tv/showList.php?%'
tv_list = parse_fancaps_tv_list(tv_list_url, headers, cookies)

print('Total parsed:', len(tv_list))

In [None]:
tv_list_csv_headers = ['Title', 'Season', 'Id']

dump_fancaps_lists_to_csv(tv_list, tv_list_filepath, tv_list_csv_headers)

## Parse anime list

In [None]:
anime_list_url = 'https://fancaps.net/anime/showList.php?%'
anime_list = parse_fancaps_anime_list(anime_list_url, headers, cookies)

print('Total parsed:', len(anime_list))

In [None]:
anime_list_csv_headers = ['Title', 'Alternative title', 'Id']

dump_fancaps_lists_to_csv(anime_list, anime_list_filepath, anime_list_csv_headers)

## Parse data

In [None]:
def extract_image_id(url: str) -> int:
    last_dot_pos = url.rfind('.')
    last_slash_pos = url.rfind('/', 0, last_dot_pos-1)

    image_id_str = url[last_slash_pos + 1:last_dot_pos]
    image_id = int(image_id_str)

    return image_id


def fetch_url_with_retries(url, headers, cookies, retries=3):
    for i in range(1, retries + 1):
        try:
            response = httpx.get(url, headers=headers, cookies=cookies, follow_redirects=True)
            status_code = response.status_code

            if status_code == 200:
                if i != 1:
                    print(f'URL fetched successfully after {i-1} retr{"y" if i == 2 else "ies"}: {url}')

                return response.content

            print(f'Failed to fetch the website! Status code: {status_code}. URL: {url}. Retrying {i}\{retries}...')
            time.sleep(1 + random.random())
        except Exception as e:
            print(f'Exception while fetching by url ({url}): {e}. Retrying {i}\{retries}...')
            time.sleep(1 + random.random())

    raise Exception(f'Number of retries ({retries}) exceeded for url: {url}')


def fetch_movie_with_retries(title, year, movie_id, headers, cookies, retries=3):
    url = 'https://fancaps.net/movies/MovieImages.php?movieid=' + str(movie_id)
    response_content = fetch_url_with_retries(url, headers, cookies, retries)

    soup = BeautifulSoup(response_content, 'html.parser')
    images_bar = soup.find('section', {'id': 'contentbody'}).find('div', {'class': 'middle_bar'})

    first_image = images_bar.find('img')
    first_image_src = first_image['src']
    first_image_id = extract_image_id(first_image_src)

    next_url = url + '&page=999999'
    response_content = fetch_url_with_retries(next_url, headers, cookies, retries)

    soup = BeautifulSoup(response_content, 'html.parser')
    images_bar = soup.find('section', {'id': 'contentbody'}).find('div', {'class': 'middle_bar'})

    last_image = images_bar.find_all('img')[-1]
    last_image_src = last_image['src']
    last_image_id = extract_image_id(last_image_src)

    return {
        'id': movie_id,
        'title': title,
        'year': year,
        'first_image_id': first_image_id,
        'last_image_id': last_image_id
    }


def fetch_episodes_info(serial_id, url_title, is_anime, headers, cookies, retries=3):
    url = f'https://fancaps.net/{"anime" if is_anime else "tv"}/showimages.php?{serial_id}-{url_title}'
    episodes_info = []

    continue_search = True
    page = 1

    links_search_param = {'style': 'color:black;'} if is_anime else {'class': 'btn btn-block'}

    while continue_search:
        fetch_url = url
        if page != 1:
            fetch_url += '&page=' + str(page)

        response_content = fetch_url_with_retries(fetch_url, headers, cookies, retries)

        soup = BeautifulSoup(response_content, 'html.parser')
        contentbody_section = soup.find('section', {'id': 'contentbody'})

        target_links = contentbody_section.find_all('a', links_search_param)

        if target_links:
            page += 1
            for a in target_links:
                link = a.get('href')
                episode_id = int(link.split('?', 1)[1].split('-', 1)[0])

                link_text = a.text.strip()
                marker_index = link_text.rfind('Episode ')

                if marker_index == -1:
                    if is_anime:
                        episode_str = link_text[len('Images From '):].strip()
                    else:
                        episode_str = link_text[len('See More Images '):].strip()
                else:
                    episode_str = link_text[marker_index + len('Episode '):].strip()

                episodes_info.append((episode_id, episode_str))
        else:
            continue_search = False

    return episodes_info


def fetch_last_image_id(url, page, is_anime, headers, cookies, retries=3):
    to_search_url = url + '&page=' + str(page) if page != 1 else url
    response_content = fetch_url_with_retries(to_search_url, headers, cookies, retries)

    soup = BeautifulSoup(response_content, 'html.parser')
    contentbody_section = soup.find('section', {'id': 'contentbody'})

    marker = 'Episode Screencaps' if is_anime else 'Episode Images'
    h3_marker = contentbody_section.find('h3', string=marker)
    images_div = h3_marker.find_next('div', class_='row')
    images = images_div.find_all('img')

    if not images:
        return

    last_image = images[-1]
    last_image_src = last_image['src']
    last_image_id = extract_image_id(last_image_src)

    return last_image_id


def last_image_id_binary_search(url, l, r, is_anime, headers, cookies, retries=3):
    previous_last_image_id = None
    while l + 1 != r:
        m = l + (r - l) // 2

        last_image_id = fetch_last_image_id(url, m, is_anime, headers, cookies, retries)

        if last_image_id is None:
            r = m
        else:
            previous_last_image_id = last_image_id
            l = m

    if previous_last_image_id is None:
        previous_last_image_id = fetch_last_image_id(url, l, is_anime, headers, cookies, retries)

    return previous_last_image_id, l


translation_table = str.maketrans({
    'ō': 'o',
    'á': 'a',
    'ú': 'u'
})


def translate_string_to_url_format(text: str):
    return re.sub(r'\/\/|[^a-zA-Z0-9&~%`]', '_', re.sub(r'[.!?()+"]', '', text.translate(translation_table)))


def is_first_char_digit(input_string: str):
    return input_string and input_string[0].isdigit()


def fetch_episode_images_info(episode_id, episode_str, url_title, is_anime, headers, cookies, retries=3, default_last_page=20):
    episode_str_replaced = translate_string_to_url_format(episode_str)

    if is_first_char_digit(episode_str_replaced) or (is_anime and episode_str_replaced.startswith('Special_')):
        episode_url_param = f'Episode_{episode_str_replaced}'
    else:
        episode_url_param = episode_str_replaced

    url = f'https://fancaps.net/{"anime" if is_anime else "tv"}/episodeimages.php?{episode_id}-{url_title}/{episode_url_param}'

    response_content = fetch_url_with_retries(url, headers, cookies, retries)

    soup = BeautifulSoup(response_content, 'html.parser')
    contentbody_section = soup.find('section', {'id': 'contentbody'})

    marker = 'Episode Screencaps' if is_anime else 'Episode Images'
    h3_marker = contentbody_section.find('h3', string=marker)
    images_div = h3_marker.find_next('div', class_='row')

    first_image = images_div.find('img')
    first_image_src = first_image['src']
    first_image_id = extract_image_id(first_image_src)

    current_page = default_last_page
    step = 15

    last_image_id = fetch_last_image_id(url, current_page, is_anime, headers, cookies, retries)

    if last_image_id is None:
        current_page -= 1
        last_image_id = fetch_last_image_id(url, current_page, is_anime, headers, cookies, retries)
        if last_image_id is not None:
            return (first_image_id, last_image_id, current_page)

        while last_image_id is None:
            current_page = max(current_page - step, 1)
            last_image_id = fetch_last_image_id(url, current_page, is_anime, headers, cookies, retries)

        l = current_page
        r = current_page + step
    else:
        current_page += 1
        test_for_no_images = fetch_last_image_id(url, current_page, is_anime, headers, cookies, retries)
        if test_for_no_images is None:
            return (first_image_id, last_image_id, current_page - 1)

        while last_image_id is not None:
            current_page += step
            last_image_id = fetch_last_image_id(url, current_page, is_anime, headers, cookies, retries)

        l = current_page - step
        r = current_page

    last_image_id, last_image_page = last_image_id_binary_search(url, l, r, is_anime, headers, cookies, retries)

    return (first_image_id, last_image_id, last_image_page)


def fetch_tv_info(title, season, id, headers, cookies, retries=3):
    try:
        episodes_data = []
        title_replaced = translate_string_to_url_format(title)
        url_title = f'{title_replaced}_Season_{season}' if season else title_replaced

        episodes_info = fetch_episodes_info(id, url_title, False, headers, cookies, retries)
        default_last_page = 20

        for episode_id, episode_str in episodes_info:
            first_image_id, last_image_id, default_last_page = fetch_episode_images_info(episode_id, episode_str, url_title, False, headers, cookies, retries, default_last_page)
            episodes_data.append({
                'id': episode_id,
                'episode': episode_str,
                'first_image_id': first_image_id,
                'last_image_id': last_image_id
            })

        return {
            'id': id,
            'title': title,
            'season': season,
            'episodes': episodes_data
        }
    except Exception as e:
        print(f'Exception while fetching tv: {title} Season {season} ({id}). {e}')

    return None


def fetch_anime_info(title, alternative_title, id, headers, cookies, retries=3):
    try:
        episodes_data = []
        url_title = translate_string_to_url_format(title)

        episodes_info = fetch_episodes_info(id, url_title, True, headers, cookies, retries)
        default_last_page = 20

        for episode_id, episode_str in episodes_info:
            first_image_id, last_image_id, default_last_page = fetch_episode_images_info(episode_id, episode_str, url_title, True, headers, cookies, retries, default_last_page)
            episodes_data.append({
                'id': episode_id,
                'episode': episode_str,
                'first_image_id': first_image_id,
                'last_image_id': last_image_id
            })

        return {
            'id': id,
            'title': title,
            'alternative_title': alternative_title,
            'episodes': episodes_data
        }
    except Exception as e:
        print(f'Exception while fetching anime: {title} ({id}). {e}')

    return None


def parse_movies(movies_list, headers, cookies, retries = 3):
    movies_data = []
    movies_with_error = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        with tqdm(total=len(movies_list)) as pbar:
            future_to_movie = {executor.submit(fetch_movie_with_retries, title, year, movie_id, headers, cookies, retries): (title, movie_id) for title, year, movie_id in movies_list}
            for future in concurrent.futures.as_completed(future_to_movie):
                title, movie_id = future_to_movie[future]
                try:
                    movie_data = future.result()
                    if movie_data is None:
                        movies_with_error.append(movie_id)
                    else:
                        movies_data.append(movie_data)
                except Exception as e:
                    movies_with_error.append(movie_id)
                    print(f'Exception for movie: {title} ({movie_id}). Error: {e}')

                pbar.update(1)

    movies_data.sort(key=lambda x: x['title'])

    return movies_data, movies_with_error


def parse_tv(tv_list, headers, cookies, retries = 3):
    tv_data = []
    tv_with_error = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        with tqdm(total=len(tv_list)) as pbar:
            future_to_tv = {executor.submit(fetch_tv_info, title, season, tv_id, headers, cookies, retries): (title, season, tv_id) for title, season, tv_id in tv_list}
            for future in concurrent.futures.as_completed(future_to_tv):
                title, season, tv_id = future_to_tv[future]
                try:
                    data = future.result()
                    if data is None:
                        tv_with_error.append(tv_id)
                    else:
                        tv_data.append(data)
                except Exception as e:
                    tv_with_error.append(tv_id)
                    print(f'Exception for tv: {title} Season {season} ({tv_id}). {e}')

                pbar.update(1)

    tv_data.sort(key=lambda x: x['title'])

    return tv_data, tv_with_error


def parse_anime(anime_list, headers, cookies, retries = 3):
    anime_data = []
    anime_with_error = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        with tqdm(total=len(anime_list)) as pbar:
            future_to_anime = {executor.submit(fetch_anime_info, title, alt_title, anime_id, headers, cookies, retries): (title, anime_id) for title, alt_title, anime_id in anime_list}
            for future in concurrent.futures.as_completed(future_to_anime):
                title, anime_id = future_to_anime[future]
                try:
                    data = future.result()
                    if data is None:
                        anime_with_error.append(anime_id)
                    else:
                        anime_data.append(data)
                except Exception as e:
                    anime_with_error.append(anime_id)
                    print(f'Exception for anime: {title} ({anime_id}). {e}')

                pbar.update(1)

    anime_data.sort(key=lambda x: x['title'])

    return anime_data, anime_with_error


def fetch_movie_images_ids(movie_id, headers, cookies, retries = 3):
    page = 1
    images_ids = []
    continue_search = True
    base_url = f'https://fancaps.net/movies/MovieImages.php?movieid={movie_id}&page='

    while continue_search:
        fetch_url = base_url + str(page)
        responce_content = fetch_url_with_retries(fetch_url, headers, cookies, retries)
        soup = BeautifulSoup(responce_content, 'html.parser')
        images_bar = soup.find('section', {'id': 'contentbody'}).find('div', {'class': 'middle_bar'})

        title = images_bar.find('h2', {'class': 'post_title'})
        images_div = title.find_next_sibling('div', {'class': ''})
        next_page_url = images_bar.select_one('ul.pagination li:last-child a').get('href')

        if images_div:
            page += 1
            found_images = images_div.find_all('img', {'class': 'imageFade'})
            continue_search = found_images and len(found_images) > 0 and next_page_url != '#'
            for found_image in found_images:
                images_ids.append(extract_image_id(found_image.get('src')))
        else:
            continue_search = False

    return images_ids


def fetch_episode_images_ids(url_title, episode_data, is_anime, headers, cookies, retries = 3):
    page = 1
    images_ids = []

    episode_id = episode_data['id']
    episode_str = episode_data['episode']

    episode_str_replaced = translate_string_to_url_format(episode_str)
    if is_first_char_digit(episode_str_replaced) or (is_anime and episode_str_replaced.startswith('Special_')):
        episode_url_param = f'Episode_{episode_str_replaced}'
    else:
        episode_url_param = episode_str_replaced

    url = f'https://fancaps.net/{"anime" if is_anime else "tv"}/episodeimages.php?{episode_id}-{url_title}/{episode_url_param}'

    while True:
        fetch_url = url
        if page != 1:
            fetch_url += '&page=' + str(page)

        response_content = fetch_url_with_retries(fetch_url, headers, cookies, retries)

        soup = BeautifulSoup(response_content, 'html.parser')
        contentbody_section = soup.find('section', {'id': 'contentbody'})

        marker = 'Episode Screencaps' if is_anime else 'Episode Images'
        h3_marker = contentbody_section.find('h3', string=marker)
        images_div = h3_marker.find_next('div', class_='row')
        images = images_div.find_all('img')

        if not images:
            break

        for image in images:
            image_src = image['src']
            image_id = extract_image_id(image_src)
            images_ids.append(image_id)

        page += 1

    return images_ids


def fix_movies_conflicts(movies_data: list, headers, cookies, retries = 3):
    if len(movies_data) <= 1:
        return 0, 0

    movies_conflict_indices = set()
    movies_data.sort(key=lambda x: x['first_image_id'])

    movies_iterator = iter(movies_data)
    current_movie = next(movies_iterator)
    index_counter = 0

    for movie in movies_iterator:
        last_movie = current_movie
        current_movie = movie

        start_current = current_movie['first_image_id']
        end_last = last_movie['last_image_id']

        if start_current <= end_last:
            movies_conflict_indices.add(index_counter)
            movies_conflict_indices.add(index_counter + 1)

        index_counter += 1

    movies_to_fix = [movies_data[i] for i in movies_conflict_indices]
    errors_while_fix_movies = set()

    with concurrent.futures.ThreadPoolExecutor() as executor:
        with tqdm(total=len(movies_conflict_indices)) as pbar:
            future_get_images_id = {executor.submit(fetch_movie_images_ids, movie_data['id'], headers, cookies, retries): movie_data for movie_data in movies_to_fix}
            for future in concurrent.futures.as_completed(future_get_images_id):
                movie_data = future_get_images_id[future]
                movie_id = movie_data['id']
                try:
                    images_ids = future.result()
                    movie_data['valid_images_ids'] = images_ids
                except Exception as e:
                    errors_while_fix_movies.add(movie_id)
                    print(f'Exception for movie: {movie_data["title"]} ({movie_id}). Error: {e}')

                pbar.update(1)

    total_errors = len(errors_while_fix_movies)
    if total_errors > 0:
        movies_data = [movie_data for movie_data in movies_data if movies_data['id'] not in errors_while_fix_movies]

    movies_data.sort(key=lambda x: x['title'])

    return len(movies_to_fix) - total_errors, total_errors


def auto_title_url_formatter(item: dict):
    title = translate_string_to_url_format(item['title'])
    season = item.get('season')
    if season:
        return f'{title}_Season_{season}'

    return title


def fix_tv_and_anime_conflicts(data: list, is_anime, headers, cookies, retries = 3):
    images_intervals = []
    for i, item in enumerate(data):
        episodes = item['episodes']
        for j, episode in enumerate(episodes):
            first_image_id = episode['first_image_id']
            last_image_id = episode['last_image_id']
            images_intervals.append((first_image_id, last_image_id, i, j))

    if len(images_intervals) <= 1:
        return 0, 0

    intervals_conflict_indices = []
    images_intervals.sort(key=lambda x: x[0])

    intervals_iterator = iter(images_intervals)
    current_interval = next(intervals_iterator)

    for interval in intervals_iterator:
        last_interval = current_interval
        current_interval = interval

        start_current = current_interval[0]
        end_last = last_interval[1]

        if start_current <= end_last:
            intervals_conflict_indices.append((last_interval[2], last_interval[3]))
            intervals_conflict_indices.append((current_interval[2], current_interval[3]))

    del images_intervals

    episodes_to_fix = [(i, j, auto_title_url_formatter(item), item['episodes'][j]) for i, j in intervals_conflict_indices for item in [data[i]]]

    del intervals_conflict_indices

    errors_while_fix = dict()

    with concurrent.futures.ThreadPoolExecutor() as executor:
        with tqdm(total=len(episodes_to_fix)) as pbar:
            future_get_images_id = {executor.submit(fetch_episode_images_ids, title, episode, is_anime, headers, cookies, retries): (i, j, title, episode) for i, j, title, episode in episodes_to_fix}
            for future in concurrent.futures.as_completed(future_get_images_id):
                i, j, title, episode_data = future_get_images_id[future]
                episode_id = episode_data['id']
                try:
                    images_ids = future.result()
                    episode_data['valid_images_ids'] = images_ids
                except Exception as e:
                    item_with_errors = errors_while_fix.get(i)

                    if item_with_errors is None:
                        errors_while_fix[i] = set([j])
                    else:
                        item_with_errors.add(j)

                    print(f'Exception for: {title} ({episode_id}). Error: {e}')

                pbar.update(1)

    total_errors = len(errors_while_fix)

    for i, error_items in errors_while_fix.items():
        episodes = data[i]['episodes']
        episodes = [episode for idx, episode in enumerate(episodes) if idx not in error_items]

    return len(episodes_to_fix) - total_errors, total_errors


def dump_json(data, filepath: str):
    with open(filepath, 'w') as file:
        json.dump(data, file)

### Parse movies data

In [None]:
start_time = time.time()

movies_data, movies_with_error = parse_movies(movies_list, headers, cookies, retries = 3)

print('=' * 30)
print('Total movies parsed:', len(movies_data))
print('Total errors:', len(movies_with_error))
print('Total time:', round(time.time() - start_time), 's')

In [None]:
start_time = time.time()

total_fixed, total_errors = fix_movies_conflicts(movies_data, headers, cookies, retries = 3)

print('=' * 30)
print('Total interval conflicts fixed:', total_fixed)
print('Unable to fix:', total_errors)
print('Total time:', round(time.time() - start_time), 's')

In [None]:
dump_json(movies_data, movies_data_filepath)

### Parse tv data

In [None]:
start_time = time.time()

tv_data, tv_with_error = parse_tv(tv_list, headers, cookies, retries = 5)

print('=' * 30)
print('Total tv parsed:', len(tv_data))
print('Total errors:', len(tv_with_error))
print('Total time:', round(time.time() - start_time), 's')

In [None]:
start_time = time.time()

total_fixed, total_errors = fix_tv_and_anime_conflicts(tv_data, False, headers, cookies, retries = 5)

print('=' * 30)
print('Total interval conflicts fixed:', total_fixed)
print('Unable to fix:', total_errors)
print('Total time:', round(time.time() - start_time), 's')

In [None]:
dump_json(tv_data, tv_data_filepath)

### Parse anime data

In [None]:
start_time = time.time()

anime_data, anime_with_error = parse_anime(anime_list, headers, cookies, retries = 5)

print('=' * 30)
print('Total anime parsed:', len(anime_data))
print('Total errors:', len(anime_with_error))
print('Total time:', round(time.time() - start_time), 's')

In [None]:
start_time = time.time()

total_fixed, total_errors = fix_tv_and_anime_conflicts(anime_data, True, headers, cookies, retries = 5)

print('=' * 30)
print('Total interval conflicts fixed:', total_fixed)
print('Unable to fix:', total_errors)
print('Total time:', round(time.time() - start_time), 's')

In [None]:
dump_json(anime_data, anime_data_filepath)

## Load data from files

### Load Movies

In [None]:
with open(movies_data_filepath, 'r') as file:
    movies_data_json = json.load(file)

movies_data = list(movies_data_json)

### Load TV

In [None]:
with open(tv_data_filepath, 'r') as file:
    tv_data_json = json.load(file)

tv_data = list(tv_data_json)

### Load anime

In [None]:
with open(anime_data_filepath, 'r') as file:
    anime_data_json = json.load(file)

anime_data = list(anime_data_json)