## Get eps links

#### JJK

In [10]:
from bs4 import BeautifulSoup
import requests

html_content = "https://jujutsu-kaisen.fandom.com/wiki/List_of_Episodes"

# Parse the HTML content
response = requests.get(html_content)
soup = BeautifulSoup(response.text, 'html.parser')

# Find all tables with the specified attributes
tables = soup.find_all('table', {
    'width': '100%',
    'style': 'border:1px #ED0000; overflow:hidden; text-align:center; align:center;',
    'border': '1'
})

# Initialize a list to hold all episodes
all_episodes = []

# Loop through each table and extract episode data
for table in tables:
    rows = table.find_all('tr')[1:]  # Skip the header row
    for row in rows:
        title_cell = row.find_all('td')[2]  # The third cell contains the title
        link = title_cell.find('a', href=True)  # Extract the <a> tag with a link
        if link:
            all_episodes.append({
                'name': link.text.strip(),
                'link': f"https://jujutsu-kaisen.fandom.com{link['href']}"  # Make the link absolute
            })

# Print extracted episodes
for episode in all_episodes:
    print(f"Episode: {episode['name']}, Link: {episode['link']}")

Episode: Ryomen Sukuna, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_1
Episode: For Myself, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_2
Episode: Girl of Steel, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_3
Episode: Fearsome Womb, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_4
Episode: Fearsome Womb, Part 2, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_5
Episode: After the Rain, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_6
Episode: Assault, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_7
Episode: Boring, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_8
Episode: Young Fish and Reverse Punishment, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_9
Episode: Idle Transfiguration, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_10
Episode: Narrow-Minded, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_11
Episode: To You Someday, Link: https://jujutsu-kaisen.fandom.com/wiki/Episode_12
Episode: Tomorrow, Link: htt

#### NARUTO

In [9]:
import requests
from bs4 import BeautifulSoup

# URL of the webpage
html_content = "https://naruto.fandom.com/wiki/List_of_Animated_Media"

# Fetch and parse the HTML content
response = requests.get(html_content)
soup = BeautifulSoup(response.text, 'html.parser')

# Initialize dictionary to store data
data = {}

# Find all tables with their preceding headings
tables = soup.find_all('table', class_='box table coloured bordered innerbordered style-basic fill-horiz')

for table in tables:
    # Find the preceding headings
    sub_heading = table.find_previous('h3')  # Specific subcategory
    main_heading = table.find_previous('h2')  # Main category (e.g., Episodes, Movies)

    if sub_heading and main_heading:
        # Combine main and sub headings for unique categories like "Naruto: Original Movies"
        category_title = f"{main_heading.get_text(strip=True)} - {sub_heading.get_text(strip=True)}"
    elif main_heading:
        # Use the main heading if no subheading is found
        category_title = main_heading.get_text(strip=True)
    else:
        continue
    if category_title == "OVAs - Boruto: Naruto Next Generations":
        category_title = "OVAs"

    # Extract rows from the table
    rows = table.find_all('tr')[1:]  # Skip header row
    entries = []
    for idx, row in enumerate(rows, start=1):  # Use enumerate for dynamic numbering
        cells = row.find_all(['th', 'td'])
        if cells:
            if category_title == "OVAs":
                entry = {
                    "Number": idx,
                    "Title": cells[0].find('a').text.strip() if cells[0].find('a') else cells[0].text.strip(),
                    "Link": f"https://naruto.fandom.com{cells[0].find('a')['href']}" if cells[0].find('a') else None,
                }
            else:
                entry = {
                    "Number": cells[0].text.strip(),
                    "Title": cells[1].find('a').text.strip() if cells[1].find('a') else cells[1].text.strip(),
                    "Link": f"https://naruto.fandom.com{cells[1].find('a')['href']}" if cells[1].find('a') else None,
                }
            entries.append(entry)
    # Save entries under the category
    data[category_title] = entries

# Print results
for category, items in data.items():
    print(f"{category}")
    # for item in items:
    #     print(f"  Number: {item['Number']}")
    #     print(f"  Title: {item['Title']}")
    #     print(f"  Link: {item.get('Link')}\n")

Episodes - Naruto: Original
Episodes - Naruto: Shippūden
Episodes - Boruto: Naruto Next Generations
OVAs
Movies - Naruto: Original
Movies - Naruto: Shippūden


## Consolidece Eps

#### NO filler

In [3]:
import requests
from bs4 import BeautifulSoup

# URL of the webpage
url = "https://www.animefillerlist.com/shows/naruto"
# https://www.animefillerlist.com/shows/naruto-shippuden
# https://www.animefillerlist.com/shows/boruto-naruto-next-generations
# Fetch the webpage
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the filler episodes section
filler_section = soup.find('div', class_='filler')

# Split the text into individual items or ranges
filler_episodes = []

if filler_section:
    # Extract the episode text
    episode_text = filler_section.find('span', class_='Episodes').text.strip()
    for part in episode_text.split(','):
        part = part.strip()
        if '-' in part:
            # Handle ranges like "101-106"
            start, end = map(int, part.split('-'))
            filler_episodes.extend(range(start, end + 1))  # Expand range
        else:
            # Single episode
            filler_episodes.append(int(part))

    print(f"Filler Episodes: {sorted(filler_episodes)}")  # Sort for readability
else:
    print("Filler episodes section not found.")

Filler Episodes: [26, 97, 101, 102, 103, 104, 105, 106, 136, 137, 138, 139, 140, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219]


In [8]:
non_filler_episodes = [episode for episode in data["Episodes - Naruto: Original"] if int(episode['Number']) not in filler_episodes]

# Print non-filler episodes
for episode in non_filler_episodes:
    print(f"Number: {episode['Number']}, Title: {episode['Title']}, Link: {episode['Link']}")

Number: 1, Title: Enter: Naruto Uzumaki!, Link: https://naruto.fandom.com/wiki/Enter:_Naruto_Uzumaki!
Number: 2, Title: My Name is Konohamaru!, Link: https://naruto.fandom.com/wiki/My_Name_is_Konohamaru!
Number: 3, Title: Sasuke and Sakura: Friends or Foes?, Link: https://naruto.fandom.com/wiki/Sasuke_and_Sakura:_Friends_or_Foes%3F
Number: 4, Title: Pass or Fail: Survival Test, Link: https://naruto.fandom.com/wiki/Pass_or_Fail:_Survival_Test
Number: 5, Title: You Failed! Kakashi's Final Decision, Link: https://naruto.fandom.com/wiki/You_Failed!_Kakashi%27s_Final_Decision
Number: 6, Title: A Dangerous Mission! Journey to the Land of Waves!, Link: https://naruto.fandom.com/wiki/A_Dangerous_Mission!_Journey_to_the_Land_of_Waves!
Number: 7, Title: The Assassin of the Mist!, Link: https://naruto.fandom.com/wiki/The_Assassin_of_the_Mist!
Number: 8, Title: The Oath of Pain, Link: https://naruto.fandom.com/wiki/The_Oath_of_Pain
Number: 9, Title: Kakashi: Sharingan Warrior!, Link: https://narut

In [1]:
from DataCollection import Anime
anime = Anime("naruto")
anime.save_episodes()

[<th>1
</th>, <td>"<a href="/wiki/Enter:_Naruto_Uzumaki!" title="Enter: Naruto Uzumaki!">Enter: Naruto Uzumaki!</a>"
</td>, <td>3 October 2002
</td>, <td>10 September 2005
</td>]
[<th>2
</th>, <td>"<a href="/wiki/My_Name_is_Konohamaru!" title="My Name is Konohamaru!">My Name is Konohamaru!</a>"
</td>, <td>10 October 2002
</td>, <td>10 September 2005
</td>]
[<th>3
</th>, <td>"<a href="/wiki/Sasuke_and_Sakura:_Friends_or_Foes%3F" title="Sasuke and Sakura: Friends or Foes?">Sasuke and Sakura: Friends or Foes?</a>"
</td>, <td>17 October 2002
</td>, <td>17 September 2005
</td>]
[<th>4
</th>, <td>"<a href="/wiki/Pass_or_Fail:_Survival_Test" title="Pass or Fail: Survival Test">Pass or Fail: Survival Test</a>"
</td>, <td>24 October 2002
</td>, <td>24 September 2005
</td>]
[<th>5
</th>, <td>"<a href="/wiki/You_Failed!_Kakashi%27s_Final_Decision" title="You Failed! Kakashi's Final Decision">You Failed! Kakashi's Final Decision</a>"
</td>, <td>31 October 2002
</td>, <td>1 October 2005
</td>]
[<th

ValueError: invalid literal for int() with base 10: 'Find the Four-Leaf Red Clover!'