In [120]:
import os
import re
from fuzzywuzzy import fuzz

In [117]:
### SHOW SPECIFIC CONFIGURATION ###

### gentle REMINDER: ###
# 1. make sure you go through visually and inspect the episode list you have with you, be it scrapped or
#    manually created, and make sure the episode names are best to your knowledge
# 2. make sure to point to the right directories.

SHOW_NAME = "History of the Great War"
EPISODE_DOWNLOADED_DIRS = [r"S:\tartube\channel_1"]
SEARCH_THRESHOLD = 0.9


def custom_clean_function(downloaded_ep_name):
    """write this to clean your file name which may be unique to how you have downloaded it.
    Try to make it return as close to the episode name in the list as possible.

    Args:
            downloaded_ep_name (str): the episode name in the downloaded folder, with only 1 dot before its extension, and all non alphanumeric characters removed.
    """
    # remove the show number from before the semi colon if it exists
    if ":" in downloaded_ep_name:
        downloaded_ep_name = downloaded_ep_name.split(":")[1]
    return downloaded_ep_name

In [77]:
# print the current working directory
print(os.getcwd())

s:\Shows\Shows-scripts\script_files


In [78]:
# get the list of episodes
episodes_file = os.path.join(
	os.getcwd(),
	f"../shows_info_files/{SHOW_NAME}",
	"episode_name_list.txt",
)

In [79]:
# read episodes file.
# and store it in hashmap

episodes = {}
with open(episodes_file, "r") as f:
	for i, line in enumerate(f):
		episodes[line.strip()] = i + 1

In [80]:
print("Length of episodes:", len(episodes))
# print the first 100 episodes just to see how it looks


for episode_name, episode_number in list(episodes.items())[:10]:


	print(f"{episode_number}: {episode_name}")

Length of episodes: 306
1: 100 Years Ago Today
2: War Has Changed
3: July Crisis Part 1
4: July Crisis Part 2
5: July Crisis Part 3
6: Plans
7: Liege
8: Battle of the Frontiers
9: The British
10: Disaster in the East


In [81]:
# get the list of files in the directory
all_files = []
for directory in EPISODE_DOWNLOADED_DIRS:
	all_files.extend(os.listdir(directory))

In [82]:
print("Length of all files:", len(all_files))
# print the first 10 files just to see how it looks
for file in all_files[:10]:
	print(file)

Length of all files: 306
100 Years Ago Today.mp3
182： Versailles Pt. 1 - The Treaty That Shaped (Broke？) The World.mp3
183： Versailles Pt. 2 - Guests at the Party.mp3
184： Versailles Pt. 3 - The League of Nations.mp3
185： Versailles Pt. 4 - Racial Equality, Denied. and the Betrayal of China.mp3
186： Versailles Pt. 5 - Distributing the Spoils.mp3
187： Versailles Pt. 6 - Breaking Up the Middle East.mp3
188： Versailles Pt. 7 - The Dissolution of Turkey...Or Not.mp3
189： The Medical War Pt. 1.mp3
190： The Medical War Pt. 2.mp3


In [85]:
# check if lengths are same
if len(episodes) == len(all_files):
	print("Lengths are same! congratulations!")
elif len(episodes) > len(all_files):
	print("More episodes than files")
else:
	print("More files than episodes")

Lengths are same! congratulations!


In [121]:
def clean_ep_name(ep_name):
    # remove all special characters
    ep_name = re.sub(r"[^A-Za-z0-9\s]", "", ep_name)
    # as the extension is the last 3 characters, lets make sure there is no other dot other than the 4th last char
    ep_name = ep_name[:-3] + "." + ep_name[-3:].replace(".", "_")

    return custom_clean_function(ep_name)

In [124]:
def rename_episode(old_name, new_name):
    # go through the directory list, and try each one to see if it matches
    for dir in EPISODE_DOWNLOADED_DIRS:
        try:
            if old_name in os.listdir(dir):
                print(f"Renaming {old_name} to {new_name}")
                os.rename(os.path.join(dir, old_name), os.path.join(dir, new_name))
                return True
        except Exception as e:
            print(f"Error renaming {old_name} to {new_name} in {dir}")
            print(e)
            return False

In [126]:
# iterate through all files and see if you find an episode match
issues = {}
for episode in all_files:
    print(f"Checking episode: {episode}")
    old_name, extension = clean_ep_name(episode).split(".")
    old_name = old_name.strip()
    print(f"Cleaned name: {old_name}")
    # do a fuzzy search
    max_match = 0
    best_match = ""
    for ep in episodes.keys():
        match = fuzz.ratio(old_name, ep)
        if match > max_match:
            max_match = match
            best_match = ep

    print(f"Found episode: {best_match}")
    # rename it to the episode number
    new_name = f"{episodes[best_match]}. {old_name}.{extension}"
    if not rename_episode(episode, new_name):
        issues[episode] = new_name

Checking episode: 100 Years Ago Today.mp3
Cleaned name: 100 Years Ago Today
Found episode: 100 Years Ago Today
Checking episode: 182： Versailles Pt. 1 - The Treaty That Shaped (Broke？) The World.mp3
Cleaned name: 182 Versailles Pt 1  The Treaty That Shaped Broke The World
Found episode: 182: Versailles Pt. 1 - The Treaty That Shaped (Broke?) The World
Renaming 182： Versailles Pt. 1 - The Treaty That Shaped (Broke？) The World.mp3 to 188. 182 Versailles Pt 1  The Treaty That Shaped Broke The World.mp3
Checking episode: 183： Versailles Pt. 2 - Guests at the Party.mp3
Cleaned name: 183 Versailles Pt 2  Guests at the Party
Found episode: 183: Versailles Pt. 2 - Guests at the Party
Renaming 183： Versailles Pt. 2 - Guests at the Party.mp3 to 189. 183 Versailles Pt 2  Guests at the Party.mp3
Checking episode: 184： Versailles Pt. 3 - The League of Nations.mp3
Cleaned name: 184 Versailles Pt 3  The League of Nations
Found episode: 184: Versailles Pt. 3 - The League of Nations
Renaming 184： Versa