# Cleaning s4 fic_urls for merging with s4 fic_text
## Also, cleaning s4 series text -> s5 series text
- **By:** Sofia Kobayashi
- **Date:** 11/02/2023
- **Description:** Need to add fic_id, author, title, fandoms, and location to s4 fic_urls since that is what are used in fic matching comparisons
    - Broke fic_urls into 3 DFs: ao3, ffn, oth

In [271]:
import pandas as pd
import re
import AO3
import json
import numpy as np

import os
from dotenv import load_dotenv
from pathlib import Path
env_path = Path(".") / ".env"
load_dotenv(dotenv_path=env_path)

pd.set_option('display.max_columns', None)

In [272]:
# Constants
LOCATION_ORDER = {'ao3': 1,
                  'ffn_net': 2,
                  'tum': 3,
                  'drm': 4,
                  'lvj': 5,
                  'fcb': 6,
                  'wat': 7,
                  'pdf': 9,
                  'oth': 100,
                 }

DTB_ORDER = {'coffee': 1,
            'read': 2,
            'cont_read': 3,
            'to_read': 4}

## Load in s4 fic_urls

In [273]:
df = pd.read_csv('clean_data_4/all_versions_fic_url.csv', index_col=0, encoding='utf-8-sig')
df.head()

Unnamed: 0,version_num,smk_source,dtb_type,location,fic_id,url,categories,is_bold,fandom_type,fandom,fic_status,title,is_coffee,fic_series,author,length,is_complete,is_subbed,is_backedup,is_bookmarked,in_category,all_tags,current_chapter,fic_rating,to_read_rating,to_read_description,is_finished_inputting_data,ffn_date_updated_2-4-23,ffn_date_added
579,7.0,v7_updates,to_read,ao3,,http://archiveofourown.org/works/260273/chapte...,,False,,"batman,dcu",,Gonna Be A Better One (A Thousand Miles To You...,,,,,,,,,,,,,,"TimKon, In which Tim quits being Robin, Kon re...",,,
1589,8.0,v8_local_files,,ao3,,http://www.archiveofourown.org/works/26671084,,,,,,,,,,,,,,,,,,,,,,,
1590,8.0,v8_local_files,,ao3,,http://www.archiveofourown.org/works/27740392,,,,,,,,,,,,,,,,,,,,,,,
1591,8.0,v8_local_files,,ao3,,http://www.archiveofourown.org/works/31373576,,,,,,,,,,,,,,,,,,,,,,,
960,7.1,v7.1_ffn,read,ao3,,https://archiveofourown.org/works/717740/chapt...,,,,bleach,,"On Life, On Death, On Everything In-Between",,,"cywscross,",51.0,False,True,True,,,"well_written,.Time Travel,.BAMF,.Fix-It,.Prote...",,3.0,,,True,,


In [274]:
# REPORT: fic_id, title, author, location, fandom
print("DF LEN:", len(df), "\n")
for col in ['fic_id','title','author','location','fandom']:
    print(col, len(df[pd.isnull(df[col])]))

DF LEN: 3834 

fic_id 3667
title 2278
author 2609
location 0
fandom 2413


In [275]:
# Separating the 3 url types
df = pd.read_csv('clean_data_4/all_versions_fic_url.csv', index_col=0, encoding='utf-8')

ao3_urls = [url for url in df.url.to_list() if 'archiveofourown.org' in url]
ffn_urls = [url for url in df.url.to_list() if 'fanfiction.net' in url]
oth_urls = [url for url in df.url.to_list() if 'archiveofourown.org' not in url 
            and 'fanfiction.net' not in url]

# Making 3 df types
ao3 = df[df.url.isin(ao3_urls)]
ffn = df[df.url.isin(ffn_urls)]
oth = df[df.url.isin(oth_urls)]

len(ao3_urls) + len(ffn_urls) + len(oth_urls), len(df)

(3834, 3834)

## AO3 Cleaning

In [142]:
# Fill fic_id
for ind in ao3.index:
    fic_id = re.compile(r'/works/(\d+)').findall(ao3.loc[ind].url)
    ao3.loc[ind, 'fic_id'] = fic_id[0] if fic_id else ao3.loc[ind, 'fic_id']
    
"# fic_id is null: " + str(len(ao3[pd.isnull(ao3.fic_id)]))

'# fic_id is null: 0'

In [144]:
ao3.location.unique()

array(['ao3'], dtype=object)

In [59]:
# Load in cleaned CSV (not sure where it's from)
ao3 = pd.read_csv('clean_data_4/all_versions_fic_url_clean_ao3.csv', encoding='utf-8-sig', index_col=0)

"# blank titles: ", len(ao3[pd.isnull(ao3.title)])

('# blank titles: ', 58)

## Merge fics in s4 fic urls
- **Problem:** there are many rows with the SAME fic_id
- **Thus:** will merge all rows w/ same id -> one row
- **Desired Result:** DF where there are only one row per fic


In [28]:
# Helpers for merge_in_id()
def stringify_dict(dict):
    """
    Takes dict.
    Stringifies any keys that are lists (so can be stored in pd.DataFrame).
    Returns dict.
    """
    for key in dict:
        # if isinstance(dict[key], list):
        dict[key] = json.dumps(dict[key], ensure_ascii=False) # Don't encode non-ascii characters
    return dict

# Testing stringify_lists_in_dict()
# stringify_dict(t1)

In [2]:
# Merge on fic_id function
def merge_on_id(parent_df, fic_id) -> dict:
    """
    Takes a coffee df (df), either text or url, ideally of multiple rows.
    Merges data, as specificed below, into a single row.
    Returns author dict of merged info (dict).
    """
    # Load in df of fics with given fic_id
    df = parent_df[parent_df['fic_id'] == fic_id]

    # Make sure there are NOT empty dfs
    if len(df) == 0:
        print("ERROR: len 0")
        return {}     

    # Get dict, remove NaN's & de-dup lists
    cols = df.to_dict('list')
    for key in cols:
        cols[key] = list(set([ele for ele in cols[key] if not pd.isna(ele)]))

    # Sort & clean various attributes
    sorted_sources = sorted(cols['smk_source'], key = lambda source: float(source.split('_')[0][1:]))
    clean_authors = [author.replace(",", "") for author in cols['author']]

    clean_dtbs = set()
    for dtb_str in cols['dtb_type']:
        new_dtbs = dtb_str.split(',')
        for dtb in new_dtbs:
            clean_dtbs.add(dtb.strip())
    clean_dtbs = list(clean_dtbs)
    sorted_dtbs = sorted(clean_dtbs, key=lambda dtb: DTB_ORDER[dtb])
    avg_rating = None if len(cols['fic_rating']) == 0 else round(sum(cols['fic_rating'])/len(cols['fic_rating']), 2)

    # Make sure there are all cols from s4 fic urls AND all cols from s5 stuff
    result = {'primary_version': min(cols['version_num']), 
            'version_nums': cols['version_num'],
            'primary_source': [] if len(sorted_sources) == 1 else sorted_sources[0],
            'smk_sources': sorted_sources, 
            'primary_location': 'ao3', # they're all from ao3 
            'locations': cols['location'],
            'work_type': 'fic', # they're all fics 
            'fandom': cols['fandom'],
            'title': "" if len(cols['title']) ==0 else cols['title'][0], 
            'author': clean_authors, 
            'primary_link': 'https://archiveofourown.org/works/'+str(int(fic_id)), 
            'all_links' : cols['url'],
            'fic_id': int(fic_id),
            'num_appeared': len(df),
            'primary_dtb': "" if len(sorted_dtbs) == 0 else sorted_dtbs[0],
            'dtb_types': sorted_dtbs,
            'fic_series': cols['fic_series'],
            'is_bold': cols['is_bold'],
            'is_coffee': cols['is_coffee'],
            'is_complete': cols['is_complete'],
            'is_subbed': cols['is_subbed'],
            'is_bookmarked': cols['is_bookmarked'],
            'is_finished_inputting_data': cols['is_finished_inputting_data'],
            'is_backedup': cols['is_backedup'],
            'categories': cols['categories'],
            'current_chapter': cols['current_chapter'],
            'all_tags': cols['all_tags'],
            'fic_rating': avg_rating,
            'all_ratings': cols['fic_rating'],
            'fandom_type': cols['fandom_type'],
            'ffn_date_added': cols['ffn_date_added'],
            'ffn_date_updated_2-4-23': cols['ffn_date_updated_2-4-23'],
            'fic_status': cols['fic_status'],
            'in_category': cols['in_category'],
            'readability_status': cols['readability_status'],
            'to_read_description': cols['to_read_description'],
            'to_read_rating': cols['to_read_rating'],
           }
    return result
    # return stringify_dict(result)

# Testing merge_on_id()
t1 = merge_on_id(ao3, 46592134)
t1

NameError: name 'ao3' is not defined

In [82]:
# Testing for specific ids
cols = ao3[ao3['fic_id'] == 46592134].to_dict('list')
for key in cols:
    cols[key] = list(set([ele for ele in cols[key] if not pd.isna(ele)]))
# cols  

# # Print sorting keys NOT already in merge_on_id() output
# sorted_keys = sorted(cols.keys())
# for key in sorted_keys:
#     if key not in t1:
#         print(f"{key}: {cols[key]}")


In [135]:
# Produces de-dupped df
unique_fic_ids = ao3.fic_id.drop_duplicates().sort_values().to_list()
merged_rows = []

for i, id in enumerate(unique_fic_ids): 
    # print(f"({i}/{len(unique_fic_ids)}) {id}")
    merged_row = merge_on_id(ao3, id)
    merged_rows.append(merged_row)

with open("clean_data_4/all_versions_fic_url_merged.json", "w") as outfile:
    json.dump(merged_rows, outfile)
    
ao3_2 = pd.DataFrame(merged_rows)
ao3_2.head()

Unnamed: 0,primary_version,version_nums,primary_source,smk_sources,primary_location,locations,work_type,fandom,title,author,primary_link,all_links,fic_id,num_appeared,primary_dtb,dtb_types,fic_series,is_bold,is_coffee,is_complete,is_subbed,is_bookmarked,is_finished_inputting_data,is_backedup,categories,current_chapter,all_tags,fic_rating,all_ratings,fandom_type,ffn_date_added,ffn_date_updated_2-4-23,fic_status,in_category,readability_status,to_read_description,to_read_rating
0,7.0,"[7.1, 7.0]",v7_ffn,"[v7_ffn, v7.1_ffn]",ao3,[ao3],fic,[fullmental_alchemist],Window Dressing,[thehoyden],https://archiveofourown.org/works/46181,[https://archiveofourown.org/works/46181],46181,2,read,[read],[],[True],[],[True],[False],[],[True],[FALSE],"[favorite,smile]",[],"[well_written,.Relationship,.Fluff,.Cute, .Rel...",3.0,[3.0],[],[],[],[],[],[readable],[],[]
1,7.0,[7.0],[],[v7_toread],ao3,[ao3],fic,[naruto],How to File Form 39-B,[thehoyden],https://archiveofourown.org/works/46222,[https://archiveofourown.org/works/46222],46222,1,to_read,[to_read],[],[],[],[],[],[],[True],[],[],[-],[],,[],[],[],[],[],[],[readable],"[Iruka/Kakashi meet cute, smut, sweet dreams]",[sd]
2,7.0,[7.0],[],[v7_toread],ao3,[ao3],fic,[naruto],Telephone,[rageprufrock],https://archiveofourown.org/works/48095,[https://archiveofourown.org/works/48095],48095,1,to_read,[to_read],[],[],[],[],[],[],[True],[],[],[-],[],,[],[],[],[],[],[],[readable],[sounds good],[2]
3,8.0,[8.0],[],[v8_local_files],ao3,[ao3],fic,"[Leverage,Batman (Movies - Nolan)]",Five times the Leverage Crew was not in Gotham...,[noelia_g],https://archiveofourown.org/works/76861,[https://archiveofourown.org/works/76861],76861,1,,[],[],[],[],[],[],[],[],[],[],[],[],,[],[],[],[],[],[],[readable],[],[]
4,7.0,"[8.0, 7.0]",v7_updates,"[v7_updates, v8_local_files]",ao3,[ao3],fic,[Chronicles of Narnia - C. S. Lewis],Carpetbaggers,[cofax],https://archiveofourown.org/works/106190,"[https://archiveofourown.org/works/106190, htt...",106190,3,cont_read,[cont_read],[],[False],[],[],[],[],[],[],[],[],[],,[],[],[],[],[],[],[readable],"[ch 6, Narnia worldbuilding]",[]


In [84]:
# Check that been de-dupped
print("(total len, # of fic_ids)")
len(ao3_2.fic_id.to_list()), len(set(ao3_2.fic_id.to_list()))

(total len, # of fic_ids)


(2442, 2442)

In [94]:
# Rename, reorder cols, and sort DF
ao3_2 = ao3_2[['fic_id', 'num_appeared', 'primary_version', 'title', 'author', 'fandom', 'version_nums', 'primary_source', 
       'smk_sources', 'primary_location', 'locations', 'work_type', 'primary_link', 'all_links', 
       'primary_dtb', 'dtb_types', 'fic_series', 'is_bold', 'is_coffee','is_complete', 'is_subbed', 'is_bookmarked',
       'is_finished_inputting_data', 'is_backedup', 'categories', 'current_chapter', 'all_tags', 'fic_rating', 'all_ratings',
       'fandom_type', 'ffn_date_added', 'ffn_date_updated_2-4-23','fic_status', 'in_category', 'readability_status', 
       'to_read_description', 'to_read_rating']]

# Sort by fic_id
ao3_2['fic_id'] = ao3_2['fic_id'].astype(int)
ao3_2 = ao3_2.sort_values(by='fic_id').reset_index(drop=True)
ao3_2['fic_id'] = ao3_2['fic_id'].astype(str)


# Write to CSV
ao3_2.to_csv("clean_data_4/all_versions_fic_url_merged_ao3.csv")

## Fill s4 fic urls with info
- **Problem:** many fics don't have all the info needed to match (ie. title, fandoms, authors, location)
- **Thus:** will use AO3 API to retrieve & fill rows
- **Desired Result:** DF with all rows having the info needed to compare fics (title, fandoms, authors, location)

**Cols Needed to Compare Fics:** title, fandom, author, location

In [177]:
# Read in relevant files
with open("clean_data_4/all_versions_fic_url_merged.json", "r") as infile:
    merged_dict = json.load(infile)
    merged_dict.sort(key=lambda fic: int(fic['fic_id']))

In [176]:
# Initiate AO3 Session
ao3_session = AO3.Session(os.environ['AO3_USERNAME'], os.environ['AO3_PASSWORD'])

In [264]:
# Function to fill an AO3 fic
def fill_all_fics(parent_dict, ao3_session):
    error_count = 0
    
    for i, fic in enumerate(parent_dict):
        # If essential info is empty AND no error present
        if(fic['title'] == "" or fic['fandom'] == [] or fic['author'] == [] or fic['locations'] == []) and "error" not in fic: 
            
            try: # Try to fill fic 
                work = AO3.Work(fic['fic_id'])
                parent_dict[i]['title'] = work.title
                parent_dict[i]['author'] = [author.username for author in work.authors]
                parent_dict[i]['fandom'] = work.fandoms
                parent_dict[i]['locations'] = ['ao3']
                print(f'Filled {fic["fic_id"]}')
            except Exception as exception: # Add error if can't fill
                errorName = type(exception).__name__
                fic['error'] = type(exception).__name__
                error_count += 1
                print(f"{errorName}: {fic['fic_id']}")

    print(f"Done! {error_count} errors.")

In [270]:
# Code to run fill all fics
fill_all_fics(merged_dict, ao3_session)
merged_dict[1651]['error'] = "MyWeirdAttributeError" # Giving AttributeError, but should be ok: https://archiveofourown.org/works/27688829

Done! 0 errors.


In [271]:
# Check that all rows have (title, fandoms, author, locations) OR (error)
for i, fic in enumerate(merged_dict):
    if (fic['title'] == "" or fic['fandom'] == [] or fic['author'] == [] or fic['locations'] == []) and ("error" not in fic):
        print(i, fic["title"])

# Printing nothing means it's working

In [272]:
# Save filled dict to JSON
with open("clean_data_4/all_versions_fic_url_filled.json", "w") as outfile:
    json.dump(merged_dict, outfile)

In [275]:
def show_counts(df, col_name):
    """
    Helper function to show value_counts() AND values.
    """
    counts = df[col_name].value_counts().to_list()
    values = df[col_name].value_counts().index.to_list()

    for i, val in enumerate(values):
        print(f"{counts[i]} {val}")

# show_counts(w, "fandom") # Will show some empty titles, fandoms, etc. -- it's the error fics

**Added Errors**
- InvalidIdError = 404 Not Found
- AttributeError = Mystery Collection OR weird scraping error? should be working?

## Standardize fandoms
- **Problem:** the fandoms in s4 fic url vs s4 fic text have what AO3 names fandoms vs what I called fandoms
- **Thus:** I will match all of my fandom names -> AO3 fandom names & convert them to standardize fandoms
- **Desired Result:** DF which rows' fandoms all draw from same AO3 values

In [1]:
# Read in relevant files
with open("clean_data_4/all_versions_fic_url_filled.json", "r") as infile:
    non_standard_fandom_fics_dict = json.load(infile)

with open('reference_info/fandom_aliases.json', 'r') as infile:
    FANDOM_NAMES = json.load(infile)

NameError: name 'json' is not defined

In [None]:
# Cleaned fic fandoms
# Function to uncompact (split, dedup, order) fandom strings
def split_and_sort_fandoms(dict):
    for fic in dict:
        clean_fandoms = set()
        for fandom_str in fic['fandom']:
            for fandom in fandom_str.split(","):
                if fandom != "":
                    clean_fandoms.add(fandom.strip())
                
        fandom_list = list(clean_fandoms)
        fic['fandom'] = sorted(fandom_list)

# Clean dict
split_and_sort_fandoms(non_standard_fandom_fics_dict)

# Check for compacted fandoms
for fic in non_standard_fandom_fics_dict:
    for fandom_str in fic['fandom']:
        if ',' in fandom_str:
            print(fic['fandom'])

# Printing none means it worked

In [190]:
# my fandoms -> AO3 fandoms dict
FANDOM_CONVERSION = {    
    '1/2_prince': '1/2 Wangzi | 1/2 Prince', 
    '2ha': "二哈和他的白猫师尊 - 肉包不吃肉 | The Husky and His White Cat Shizun - Meatbun Doesn't Eat Meat", 
    'assassins_creed': "Assassin's Creed - All Media Types", 
    'atla': 'Avatar: The Last Airbender', 
    'avengers': 'Avengers (Marvel) - All Media Types', 
    'batman': 'Batman - All Media Types', 
    'black_panther': 'Black Panther (2018)', 
    'bleach': 'Bleach', 
    'bnha': "僕のヒーローアカデミア | Boku no Hero Academia | My Hero Academia", 
    'bts': "방탄소년단 | Bangtan Boys | BTS", 
    'captain_america': 'Captain America - All Media Types', 
    'captive_prince': 'Captive Prince - C. S. Pacat', 
    'chronicles_of_narnia': 'Chronicles of Narnia - C. S. Lewis', 
    'code_geass': 'Code Geass', 
    'daredevil': 'Daredevil (TV)', 
    'dcu': 'DCU', 
    'descendants': "Descendants (Disney Movies)", 
    'detroit_become_human': 'Detroit: Become Human (Video Game)', 
    'fairy_tail': 'Fairy Tail', 
    'fbawtft': 'Fantastic Beasts and Where to Find Them (Movies)', 
    'final_fantasy_vii': 'Final Fantasy VII', 
    'final_fantasy_xv': 'Final Fantasy XV', 
    'folklore': 'Folklore', 
    'fullmetal_alchemist': 'Fullmetal Alchemist - All Media Types', 
    'game_of_thrones': 'A Song of Ice and Fire & Related Fandoms', 
    'good_omens': 'Good Omens (TV)', 
    'gotham': "Gotham (TV)", 
    'guardians_of_the_galaxy': 'Guardians of the Galaxy (Movies)', 
    'hamilton': 'Hamilton - Miranda', 
    'harry_potter': 'Harry Potter - Fandom', 
    'httyd': 'How to Train Your Dragon (Movies)', 
    'james_bond': 'James Bond (Movies)', 
    'john_wick': "John Wick (Movies)", 
    'joy_of_life': "庆余年 | Joy of Life (TV)", 
    'katekyo_hitman_reborn': 'Katekyou Hitman Reborn!', 
    'kingsman': 'Kingsman (Movies)', 
    'kuroko_no_basuke': "Kuroko no Basuke | Kuroko's Basketball", 
    'leverage': 'Leverage', 
    'lord_of_the_rings': "The Lord of the Rings - All Media Types", 
    'mcu': 'MCU', 
    'mdzs': '魔道祖师 - 墨香铜臭 | Módào Zǔshī - Mòxiāng Tóngxiù', 
    'megamind': 'Megamind (2010)', 
    'merlin': 'Merlin (TV)', 
    'minecraft': 'Minecraft (Video Game)', 
    'moon_knight': 'Moon Knight (TV 2022)', 
    'naruto': 'Naruto', 
    'one_piece': 'One Piece', 
    'percy_jackson_olympians': 'Percy Jackson and the Olympians & Related Fandoms - All Media Types', 
    'pkmn_sword&shield': 'Pocket Monsters: Sword & Shield | Pokemon Sword & Shield Versions', 
    'pokemon': 'Pocket Monsters | Pokemon (Anime)', 
    'rise_of_the_guardians': "Rise of the Guardians (2012)", 
    'rwby': "RWBY", 
    'sherlock': "Sherlock (TV)", 
    'spiderman': "Spider-Man - All Media Types", 
    'star_wars': "Star Wars - All Media Types", 
    'star_wars_cw': "Star Wars: Clone Wars (2003) - All Media Types", 
    'supernatural': "Supernatural", 
    'sword_art_online': "Sword Art Online (Anime & Manga)", 
    'teen_wolf': "Teen Wolf (TV)", 
    'the_arrow': "Arrow (TV 2012)", 
    'the_flash': "The Flash (Comics)", 
    'the_hobbit': "The Hobbit - All Media Types", 
    'the_song_of_achillles': "The Song of Achilles - Madeline Miller", 
    'the_witcher': "The Witcher (TV)", 
    'thor': "Thor - All Media Types", 
    'tiger&bunny': "Tiger & Bunny", 
    'tokyo_ghoul': "Tokyo Ghoul", 
    'transformers': "Transformers - All Media Types", 
    'umbrella_academy': "The Umbrella Academy (TV)", 
    'xmen': "X-Men - All Media Types", 
    'young_justice': "Young Justice - All Media Types", 
    'yuuri_on_ice': "Yuri!!! on Ice (Anime)", 
    'artemis_fowl': "Artemis Fowl - Eoin Colfer",
     'big_hero_6': "Big Hero 6 (2014)",
     'downton_abbey': "Downton Abbey",
     'ouran_hshc': "Ouran High School Host Club - All Media Types",
     'prince_of_tennis': "Tennis no Oujisama | Prince of Tennis",
     'the_croods': "The Croods (Movies)",
}

In [181]:
# Function & code to convert my fandoms to ao3 fandoms
def convert_my_fandoms_to_ao3_fandoms(dict):
    for fic in dict:
        clean_fandoms = []
        for fandom in fic['fandom']:
            if fandom in FANDOM_NAMES:
                fandom = FANDOM_CONVERSION[fandom]
            clean_fandoms.append(fandom)
        fic['fandom'] = sorted(clean_fandoms)

convert_my_fandoms_to_ao3_fandoms(non_standard_fandom_fics_dict)

In [182]:
# Check all fandoms
all_fandoms = set()

for fic in non_standard_fandom_fics_dict:
    for fandom in fic['fandom']: 
        all_fandoms.add(fandom)

# sorted(list(all_fandoms)) # There should be NO "CHANGE" fandom and NONE of my fandoms left

In [206]:
# Save fandom standardized dict to JSON
with open("clean_data_4/all_versions_fic_url_done_ao3.json", "w") as outfile:
    json.dump(non_standard_fandom_fics_dict, outfile)

## Clean FFN fics of s4 fic url
- **Problem:** Need to clean ffn fic urls (prev was only ao3)
- **Thus:** I will clean
- **Desired Result:** JSON of clean ffn.net fics

Fill authors & fandoms manually -- couldn't figure out how to get past FFN CloudFlare with Selenium

In [239]:
# Load in relevant DF from very top
ffn.head()

Unnamed: 0,version_num,smk_source,dtb_type,location,fic_id,url,categories,is_bold,fandom_type,fandom,fic_status,title,is_coffee,fic_series,author,length,is_complete,is_subbed,is_backedup,is_bookmarked,in_category,all_tags,current_chapter,fic_rating,to_read_rating,to_read_description,is_finished_inputting_data,ffn_date_updated_2-4-23,ffn_date_added
1736,8.0,v8_local_files,to_read,ffn_net,,https://www.fanfiction.net/s/12334156/4/Travelers,,,,"fullmetal_alchemist, avengers",,,,,RugitusAstra,15.0,,,,,,,,,,,,,
3848,8.0,v8_local_files,,ffn_net,,https://m.fanfiction.net/s/12783920/1/Authors-...,,,,downton_abbey,,,,,Mr. Chaos,610.0,,,,,,,,,,,,,
3849,8.0,v8_local_files,,ffn_net,,https://m.fanfiction.net/s/13910770/1/Nest,,,,katekyo_hitman_reborn,,,,,Ourliazo,4.0,,,,,,,,,,,,,
3850,8.0,v8_local_files,,ffn_net,,https://m.fanfiction.net/s/14052100/1/Gaslight...,,,,katekyo_hitman_reborn,,,,,Ourliazo,,,,,,,,,,,,,,
3851,8.0,v8_local_files,,ffn_net,,https://m.fanfiction.net/s/5645842/1/Rational-...,,,,artemis_fowl,,,,,AnihyrMoonstar,16.0,,,,,,,,,,,,,


In [240]:
# Fill fic_id & title
for ind in ffn.index:
    # Set fic_id
    fic_id = re.compile(r'/s/(\d+)').findall(ffn.loc[ind].url)
    ffn.loc[ind, 'fic_id'] = int(fic_id[0])
    
    # Set title
    title = re.compile(r'/s/\d+/\d+/(.+)').findall(ffn.loc[ind,'url'])[0] \
                .replace('-m-', "'m-") \
                .replace('-s-', "'s-") \
                .replace('-', " ")
    ffn.loc[ind, 'title'] = title
        

"Num of fic_id's that are null: " + str(len(ffn[pd.isnull(ffn.fic_id)]))

"Num of fic_id's that are null: 0"

In [241]:
# Function to merge ffn.net fics
def merge_ffn_fic(parent_df, fic_id):
    # Load in df of fics with given fic_id
    df = parent_df[parent_df['fic_id'] == fic_id]

    # Make sure there are NOT empty dfs
    if len(df) == 0:
        print("ERROR: len 0")
        return {}     

    # Get dict, remove NaN's & de-dup lists
    cols = df.to_dict('list')
    for key in cols:
        cols[key] = list(set([ele for ele in cols[key] if not pd.isna(ele)]))

    # Sort & clean various attributes
    sorted_sources = sorted(cols['smk_source'], key = lambda source: float(source.split('_')[0][1:]))
    clean_authors = list(set([author.replace(",", "") for author in cols['author']]))
    sorted_versions = sorted(cols['version_num'])
    sorted_locations = sorted(cols["location"], key=lambda loc: LOCATION_ORDER[loc])
    
    
    clean_dtbs = set()
    for dtb_str in cols['dtb_type']:
        new_dtbs = dtb_str.split(',')
        for dtb in new_dtbs:
            clean_dtbs.add(dtb.strip())
    clean_dtbs = list(clean_dtbs)
    sorted_dtbs = sorted(clean_dtbs, key=lambda dtb: DTB_ORDER[dtb])

    clean_tags = set()
    for tag_str in cols['all_tags']:
        new_tags = tag_str.split(',')
        for tag in new_tags:
            clean_tags.add(tag.strip())
    sorted_tags = sorted(list(clean_tags))
    
    avg_rating = None if len(cols['fic_rating']) == 0 else round(sum(cols['fic_rating'])/len(cols['fic_rating']), 2)
    # print(cols['to_read_rating'])
    int_tr = [int(rating) for rating in cols['to_read_rating']]
    avg_tr_rating = None if len(cols['to_read_rating']) == 0 else round(sum(int_tr)/len(cols['to_read_rating']), 2)
    
    
    
    return {'num_appeared': len(df),
            'primary_version': "" if len(sorted_versions) == 0 else sorted_versions[0],
            'version_num': sorted_versions,
            'primary_source': "" if len(sorted_sources) == 0 else sorted_sources[0],
             'smk_source': sorted_sources,
            'primary_dtb': "" if len(sorted_dtbs) == 0 else sorted_dtbs[0],
             'dtb_type': sorted_dtbs,
            'primary_loction': "" if len(sorted_locations) == 0 else sorted_locations[0],
             'location': sorted_locations,
             'fic_id': int(fic_id),
            'primary_url': "" if len(cols['url']) == 0 else cols['url'][0],
             'url': cols['url'],
             'categories': cols['categories'],
             'is_bold': None if len(cols["is_bold"]) == 0 else any(cols["is_bold"]), 
             'fandom_type': cols['fandom_type'],
             'fandom': cols['fandom'],
             'fic_status': None, # this column is entirely empty
             'title': "" if len(cols['title']) == 0 else cols['title'][0],
             'is_coffee': None if len(cols["is_coffee"]) == 0 else any(cols["is_coffee"]), 
             'fic_series': cols['fic_series'],
             'author': clean_authors,
             'length': None if len(cols["length"]) == 0 else max(cols["length"]),
             'is_complete': None if len(cols["is_complete"]) == 0 else any(cols["is_complete"]), 
             'is_subbed': None if len(cols["is_subbed"]) == 0 else any(cols["is_subbed"]), 
             'is_backedup': None if len(cols["is_backedup"]) == 0 else any(cols["is_backedup"]), 
             'is_bookmarked': None if len(cols["is_bookmarked"]) == 0 else any(cols["is_bookmarked"]), 
             'in_category': None, # this column is entirely empty
             'all_tags': sorted_tags,
             'current_chapter': "" if len(cols["current_chapter"]) == 0 else max(cols["current_chapter"]),
             'fic_rating': avg_rating, 
             'to_read_rating': avg_tr_rating,
             'to_read_description': cols['to_read_description'],
             'is_finished_inputting_data': None if len(cols["is_finished_inputting_data"]) == 0 else any(cols["is_finished_inputting_data"]), 
             'ffn_date_updated_2-4-23': cols['ffn_date_updated_2-4-23'],
             'ffn_date_added': cols['ffn_date_added']}

# merge_ffn_fic(ffn, 10747630)

In [242]:
# Code to merge all ffn.net fics
unique_fic_ids = ffn.fic_id.drop_duplicates().sort_values().to_list()
merged_rows = []

for i, id in enumerate(unique_fic_ids): 
    merged_row = merge_ffn_fic(ffn, id)
    merged_rows.append(merged_row)
    
merged_df = pd.DataFrame(merged_rows).sort_values(by="fic_id").reset_index(drop=True)

In [243]:
# Split & sort fandoms
split_and_sort_fandoms(merged_rows)

# Check for compacted fandoms
for fic in merged_rows:
    for fandom_str in fic['fandom']:
        if ',' in fandom_str:
            print(fic['fandom'])

# Printing none means it worked

In [244]:
# Convert fandoms in ffn 
convert_my_fandoms_to_ao3_fandoms(merged_rows)

In [245]:
# Save fandom standardized dict to JSON
with open("clean_data_4/all_versions_fic_url_done_ffn.json", "w") as outfile:
    json.dump(merged_rows, outfile)

## Clean OTH fics of s4 fic url
- **Problem:** Need to other fic urls 
- **Thus:** I will clean
- **Desired Result:** JSON of clean oth fics

Did manually, there were only 9

In [276]:
# Load in relevant DF from very top
oth.head(2)

Unnamed: 0,version_num,smk_source,dtb_type,location,fic_id,url,categories,is_bold,fandom_type,fandom,fic_status,title,is_coffee,fic_series,author,length,is_complete,is_subbed,is_backedup,is_bookmarked,in_category,all_tags,current_chapter,fic_rating,to_read_rating,to_read_description,is_finished_inputting_data,ffn_date_updated_2-4-23,ffn_date_added
315,7.0,v7_toread,to_read,pdf,,https://drive.google.com/drive/u/0/folders/1mx...,,,,merlin,,More Lovely and More Temperate,,,"Saucery,",8.0,,,,,,,-,,1.0,"gender-swapping spell on Arthur, smut",True,,
97,7.0,v7_ffn,read,tum,,https://kristtorn.tumblr.com/post/166530149542...,,True,,"harry_potter,bnha",,Divergent Bindings,,,"Araceil,",,False,,,,,,,,,,False,,


In [277]:
# Function to merge oth fics
def merge_oth(parent_df, title):
    # Load in df of fics with given fic_id
    df = parent_df[parent_df['title'] == title]

    # Make sure there are NOT empty dfs
    if len(df) == 0:
        print("ERROR: len 0")
        return {}     

    # Get dict, remove NaN's & de-dup lists
    cols = df.to_dict('list')
    for key in cols:
        cols[key] = list(set([ele for ele in cols[key] if not pd.isna(ele)]))

    avg_rating = None if len(cols['fic_rating']) == 0 else round(sum(cols['fic_rating'])/len(cols['fic_rating']), 2)
    int_tr = [int(rating) for rating in cols['to_read_rating']]
    avg_tr_rating = None if len(cols['to_read_rating']) == 0 else round(sum(int_tr)/len(cols['to_read_rating']), 2)

    # Sort & clean various attributes
    sorted_sources = sorted(cols['smk_source'], key = lambda source: float(source.split('_')[0][1:]))
    clean_authors = list(set([author.replace(",", "") for author in cols['author']]))
    sorted_versions = sorted(cols['version_num'])
    sorted_locations = sorted(cols["location"], key=lambda loc: LOCATION_ORDER[loc])
    
    
    clean_dtbs = set()
    for dtb_str in cols['dtb_type']:
        new_dtbs = dtb_str.split(',')
        for dtb in new_dtbs:
            clean_dtbs.add(dtb.strip())
    clean_dtbs = list(clean_dtbs)
    sorted_dtbs = sorted(clean_dtbs, key=lambda dtb: DTB_ORDER[dtb])

    clean_tags = set()
    for tag_str in cols['all_tags']:
        new_tags = tag_str.split(',')
        for tag in new_tags:
            clean_tags.add(tag.strip())
    sorted_tags = sorted(list(clean_tags))
 
    
    # return cols
    return {'num_appeared': len(df),
            'primary_version': "" if len(sorted_versions) == 0 else sorted_versions[0],
            'version_num': sorted_versions,
            'primary_source': "" if len(sorted_sources) == 0 else sorted_sources[0],
             'smk_source': sorted_sources,
            'primary_dtb': "" if len(sorted_dtbs) == 0 else sorted_dtbs[0],
             'dtb_type': sorted_dtbs,
            'primary_loction': "" if len(sorted_locations) == 0 else sorted_locations[0],
             'location': sorted_locations,
             'fic_id': cols["fic_id"], # they all have fic_ids
            'primary_url': "" if len(cols['url']) == 0 else cols['url'][0],
             'url': cols['url'],
             'categories': cols['categories'],
             'is_bold': None if len(cols["is_bold"]) == 0 else any(cols["is_bold"]), 
             'fandom_type': cols['fandom_type'],
             'fandom': cols['fandom'],
             'fic_status': None, # this column is entirely empty
             'title': "" if len(cols['title']) == 0 else cols['title'][0],
             'is_coffee': None if len(cols["is_coffee"]) == 0 else any(cols["is_coffee"]), 
             'fic_series': cols['fic_series'],
             'author': clean_authors,
             'length': None if len(cols["length"]) == 0 else max(cols["length"]),
             'is_complete': None if len(cols["is_complete"]) == 0 else any(cols["is_complete"]), 
             'is_subbed': None if len(cols["is_subbed"]) == 0 else any(cols["is_subbed"]), 
             'is_backedup': None if len(cols["is_backedup"]) == 0 else any(cols["is_backedup"]), 
             'is_bookmarked': None if len(cols["is_bookmarked"]) == 0 else any(cols["is_bookmarked"]), 
             'in_category': None, # this column is entirely empty
             'all_tags': sorted_tags,
             'current_chapter': "" if len(cols["current_chapter"]) == 0 else max(cols["current_chapter"]),
             'fic_rating': avg_rating, 
             'to_read_rating': avg_tr_rating,
             'to_read_description': cols['to_read_description'],
             'is_finished_inputting_data': None if len(cols["is_finished_inputting_data"]) == 0 else any(cols["is_finished_inputting_data"]), 
             'ffn_date_updated_2-4-23': cols['ffn_date_updated_2-4-23'],
             'ffn_date_added': cols['ffn_date_added']}

# Testing
# merge_oth(oth, 'Divergent Bindings')

In [282]:
# Code to merge all oth fics
titles = oth.title.drop_duplicates().sort_values().to_list()
merged_rows = []

for i, title in enumerate(titles): 
    merged_row = merge_oth(oth, title)
    merged_rows.append(merged_row)
    
merged_df = pd.DataFrame(merged_rows).sort_values(by="fic_id").reset_index(drop=True)

In [283]:
# Convert fandoms in oth 
convert_my_fandoms_to_ao3_fandoms(merged_rows)

In [284]:
# Save fandom standardized dict to JSON
with open("clean_data_4/all_versions_fic_url_done_oth.json", "w") as outfile:
    json.dump(merged_rows, outfile)

## Combine ao3, ffn.net, and oth JSON files -> s5 fic url
- **Problem:** Need to connect all 3
- **Thus:** Will do
- **Desired Result:** JSON of all s4 fic urls

In [286]:
# Load in relevant files
with open("clean_data_4/all_versions_fic_url_done_ao3.json", "r") as infile:
    ao3 = json.load(infile)

with open("clean_data_4/all_versions_fic_url_done_ffn.json", "r") as infile:
    ffn = json.load(infile)

with open("clean_data_4/all_versions_fic_url_done_oth.json", "r") as infile:
    oth = json.load(infile)

In [288]:
# Combine the 3 files
combined_dict = ao3 + ffn + oth

In [1]:
# Write to s5 fic url 
# with open("clean_data_5/fic_url.json", "w") as outfile:
#     json.dump(combined_dict, outfile) DO NOT RUN, DID SOME MANUAL DATA CLEANING RE: ffn-ao3 same fics when merging s5 fic url + text 

## Convert s4 series text -> s5 series text
- **Problem:** Haven't done s5 series text yet
- **Thus:** Will do
- **Desired Result:** De-dupped s5 series text

In [2]:
# Read in relevant files
s4_series_text = pd.read_csv("clean_data_4/all_versions_series_text.csv", index_col=0, encoding="utf-8-sig").reset_index(drop=True)
s4_series_text['title'] = s4_series_text['title'].apply(lambda x: x.strip())

In [3]:
def merge_on_title(parent_df, title):
    """
    Takes a coffee df (df), either text or url, ideally of multiple rows.
    Merges data, as specificed below, into a single row.
    Returns author dict of merged info (dict).
    """
    # Load in df of fics with given fic_id
    df = parent_df[parent_df['title'] == title]

    # Make sure there are NOT empty dfs
    if len(df) == 0:
        print("ERROR: len 0")
        return {}     

    # Get dict, remove NaN's & de-dup lists
    cols = df.to_dict('list')
    for key in cols:
        cols[key] = list(set([ele for ele in cols[key] if not pd.isna(ele)]))

    # Sort & clean various attributes
    sorted_versions = sorted(cols['version_num'])
    sorted_sources = sorted(cols['smk_source'], key = lambda source: float(source.split('_')[0][1:]))
    clean_authors = [author.replace(",", "") for author in cols['author']]
    sorted_locations = sorted(cols["location"], key=lambda loc: LOCATION_ORDER[loc])

    clean_dtbs = set()
    for dtb_str in cols['dtb_type']:
        new_dtbs = dtb_str.split(',')
        for dtb in new_dtbs:
            clean_dtbs.add(dtb.strip())
    clean_dtbs = list(clean_dtbs)
    sorted_dtbs = sorted(clean_dtbs, key=lambda dtb: DTB_ORDER[dtb])
    
    avg_rating = None if len(cols['series_rating']) == 0 else round(sum(cols['series_rating'])/len(cols['series_rating']), 2)

    return {'num_appeared': len(df),
            'primary_version': "" if len(sorted_versions) == 0 else sorted_versions[0],
            'version_nums': sorted_versions,
             'primary_source': "" if len(sorted_sources) == 0 else sorted_sources[0],
             'smk_source': sorted_sources,
             'primary_dtb': "" if len(sorted_dtbs) == 0 else sorted_dtbs[0],
             'dtb_type': sorted_dtbs,
             'primary_location': "" if len(sorted_locations) == 0 else sorted_locations[0],
             'locations': sorted_locations,
             'fandom_type': sorted(cols["fandom_type"]),
             'fandom': sorted(cols["fandom"]),
             'categories': sorted(cols["categories"]),
             'series_status': "" if len(cols["series_status"]) == 0 else cols["series_status"][0],
             'is_bold': None if len(cols["is_bold"]) == 0 else any(cols["is_bold"]),
             'title': title,
             'author': sorted(clean_authors),
             'is_coffee': None if len(cols["is_coffee"]) == 0 else any(cols["is_coffee"]),
             'is_complete': None if len(cols["is_complete"]) == 0 else any(cols["is_complete"]),
             'is_subbed': None if len(cols["is_subbed"]) == 0 else any(cols["is_subbed"]),
             'is_bookmarked': None if len(cols["is_bookmarked"]) == 0 else any(cols["is_bookmarked"]),
             'current_chapter': None if len(cols["current_chapter"]) == 0 else max(cols["current_chapter"]),
             'series_length': None if len(cols["series_length"]) == 0 else max(cols["series_length"]),
             'series_rating': avg_rating,
             'all_tags': cols["all_tags"],
             'is_finished_inputting_info': None if len(cols["is_finished_inputting_info"]) == 0 else any(cols["is_finished_inputting_info"]),
           }

# Testing
# merge_on_title(s4_series_text, "a love story for the ages")

In [224]:
# Code to merge all series
all_titles = sorted(oth.title.unique())
merged_series = []

for title in all_titles: 
    new_series = merge_oth(oth, title)
    merged_series.append(new_series)

# Create & sort results 
merged_series_df = pd.DataFrame(merged_series).sort_values(by="title").reset_index(drop=True)
merged_series = sorted(merged_series, key=lambda fic: fic['title']) 

In [226]:
# Write results to JSON
with open("clean_data_5/series_text.json", "w") as outfile:
    json.dump(merged_series, outfile)

In [427]:
# Read in relevant files
with open("clean_data_5/fic_url.json", "r") as infile:
    fic_url = json.load(infile)

fic_text = pd.read_csv("clean_data_5/fic_text.csv", index_col=0, encoding="utf-8-sig")

In [69]:
def fic_equal(row1, row2) -> bool:
    """
    Takes two fic or series rows from a database (df).
    Compares them to determine if they're the same fic.
    Returns a boolean of whether or not they're equal (bool).
    """
    # Get titles boolean
    title1 = str(row1['title']).lower().strip()
    title2 = str(row2['title']).lower().strip()
    same_title = title1 == title2
    
    # Get authors boolean (strip, lower, not empty)
    authlist1 = [] if pd.isnull(row1['author']) else row1['author'].split(',')
    authlist2 = [] if pd.isnull(row2['author']) else row2['author'].split(',')
    author1 = set([auth.lower().strip() for auth in authlist1 if auth != ''])
    author2 = set([auth.lower().strip() for auth in authlist2 if auth != ''])
    
    same_author = (author1 == author2) or (not author1 and not author2)
    absent_author = not author1 or not author2

    # Get locations boolean
    same_location = (row1['location'] == row2['location']) or \
        pd.isnull(row1['location']) or pd.isnull(row2['location'])
    
    # Get fandoms boolean
    same_fandoms = False
    if not pd.isnull(row1['fandom']) and not pd.isnull(row2['fandom']):
        fandom1 = [fan.strip() for fan in row1['fandom'].split(',')]
        fandom2 = [fan.strip() for fan in row2['fandom'].split(',')]
        diff = set(fandom1) ^ set(fandom2)
        same_fandoms = (len(diff) == 0) or (diff == {'dcu', 'batman'}) or \
            (diff == {'spiderman', 'avengers'})
        
    
    # print(f'same_title: {same_title}, same_fandoms: {same_fandoms}, same_author: {same_author}, absent_author: {absent_author}')
    # print()
    return same_title and same_fandoms and (same_author or (absent_author and same_location))


# WORKSPACE

In [32]:
# Partially complete dict of ALL fandom names
{'1/2_prince': '1/2 Wangzi | 1/2 Prince',
 '2ha': "二哈和他的白猫师尊 - 肉包不吃肉 | The Husky and His White Cat Shizun - Meatbun Doesn't Eat Meat",
 'agents_of_shield': 'Agents of S.H.I.E.L.D. (TV)',
 'arrow': 'Arrow (TV 2012)',
 'artemis_fowl':  'Artemis Fowl - Eoin Colfer',
 'assassins_creed': "Assassin's Creed - All Media Types",
 'at_the_end_of_the_road': '그 끝에 있는 것 | At the End of the Road (Webcomic)',
 'ateez_band':  'ATEEZ (Band)',
 'atla':  'Avatar: The Last Airbender',
 'attack_on_titan': 'Shingeki no Kyojin | Attack on Titan',
 'avatar': 'Avatar (Cameron Movies)',
 'avengers': 'Avengers (Marvel) - All Media Types',
 'batman': 'Batman - All Media Types',
 'batman_2022': 'The Batman (Movie 2022)',
 'big_hero_6': 'Big Hero 6 (2014)',
 'black_butler': 'Kuroshitsuji | Black Butler',
 'black_panther': 'Black Panther (2018)',
 'bleach': 'Bleach',
 'bnha': '僕のヒーローアカデミア | Boku no Hero Academia | My Hero Academia',
 'books_of_the_raksura': 'Books of the Raksura - Martha Wells',
 'brooklyn_99': 'Brooklyn Nine-Nine (TV)',
 'bts': '방탄소년단 | Bangtan Boys | BTS',
 'captain_america':  'Captain America (Movies)',
 'captive_prince': 'Captive Prince - C. S. Pacat',
 'castlevania': 'Castlevania (Cartoon 2017-2021)',
 'chronicles_of_narnia':  'Chronicles of Narnia - C. S. Lewis',
 'code_geass':  'Code Geass',
 'criminal_minds':  'Criminal Minds',
 'damien': 'Damien (TV)',
 'danny_phantom':  'Danny Phantom',
 'daredevil':  'Daredevil (TV)',
 'dark_angel': 'Dark Angel (TV)',
 'dcu': 'DCU',
 'deadpool':  'Deadpool - All Media Types',
 'death_note':  'Death Note',
 'descendants': 'Descendants (Disney Movies)',
 'detroit_become_human': 'Detroit: Become Human (Video Game)',
 'doctor_strange': 'Doctor Strange (Movies)Navigation and Actions',
 'downton_abbey': 'Downton Abbey',
 'eyeshield_21': 'Eyeshield 21',
 'fairy_tail': 'Fairy Tail,
 'fantastic_four': 'Fantastic Four',
 'fast&furious': 'Fast & Furious (Movies),
 'fbawtft': 'Fantastic Beasts and Where to Find Them (Movies)',
 'final_fantasy_vii': 'Compilation of Final Fantasy VII',
 'final_fantasy_viii': 'Final Fantasy VIII',
 'final_fantasy_xv': 'Final Fantasy XV',
 'folklore': 'folklore',
 'frozen': 'Frozen (Disney Movies)',
 'fullmetal_alchemist': 'Fullmetal Alchemist - All Media Types',
 'game_of_thrones': 'Game of Thrones (TV)',
 'good_omens': 'Good Omens (TV)',
 'gotham':'Gotham (TV)',
 'gravity_falls': 'Gravity Falls',
 'guardians_of_the_galaxy': 'Guardians of the Galaxy (Movies)',
 'gundam_wing_ac': 'Gundam Wing',
 'hamilton': 'Hamilton - Miranda',
 'hannibal': 'Hannibal (TV)',
 'harry_potter': 'Harry Potter - J. K. Rowling',
 'hells_kitchen': "Hell's Kitchen (US TV) RPF",
 'highschool_of_the_dead': 'Gakuen Mokushiroku | Highschool of the Dead',
 'house_of_the_dragon': 'House of the Dragon (TV)',
 'httyd': 'How to Train Your Dragon (Movies)',
 'hunger_games': 'Hunger Games Series - All Media Types',
 'iron_man': 'Iron Man (Movies)',
 'james_bond': 'James Bond',
 'john_wick': 'John Wick (Movies)',
 'joy_of_life': '庆余年 | Joy of Life (TV)Navigation and Actions',
 'jurassic_park': 'Jurassic Park - All Media Types',
 'justice_league': 'Justice League - All Media Types',
 'k_anime',
 'katekyo_hitman_reborn',
 'kingsman',
 'kung_fu_panda',
 'kuroko_no_basuke',
 'left4dead',
 'legend_of_korra',
 'leverage',
 'loki',
 'lord_of_the_rings',
 'lucifer',
 'magi_lom',
 'magnus_files',
 'matilda',
 'mcu',
 'mdzs',
 'megamind',
 'men_in_black',
 'merlin',
 'minecraft',
 'miraculous_ladybug',
 'mob_psycho',
 'monster_hunter',
 'moon_knight',
 'multiple_fandoms',
 'my_next_life_as_a_villainess',
 'naruto',
 'ncis',
 'nirvana_in_fire',
 'none_placeholder',
 'none_unsorted',
 'once_upon_a_time',
 'one_piece',
 'one_punch_man',
 'original_work',
 'other',
 'our_flag_means_death',
 'ouran_hshc',
 'pacific_rim',
 'percy_jackson_olympians',
 'person_of_interest',
 'phineas_and_ferb',
 'pkmn_sword&shield',
 'pokemon',
 'pokemon_detective_pikachu',
 'prince_of_tennis',
 'princess_kaguya',
 'reincarnated_as_a_sword',
 'rise_of_the_guardians',
 'riverdale',
 'rwby',
 'shadow_and_bone',
 'sherlock',
 'six_of_crows',
 'smallville',
 'solo_levelling',
 'soul_eater',
 'spiderman',
 'spiderverse',
 'star_wars',
 'star_wars_cw',
 'stargate',
 'stargate_atlantis',
 'stranger_things',
 'suicide_squad',
 'supernatural',
 'svsss',
 'sword_art_online',
 'tangled',
 'teen_titans',
 'teen_wolf',
 'temeraire',
 'tgcf',
 'the defenders',
 'the_100',
 'the_arrow',
 'the_croods',
 'the_flash',
 'the_hobbit',
 'the_kane_chronicles',
 'the_kings_avatar',
 'the_last_of_us',
 'the_punisher',
 'the_song_of_achillles',
 'the_witcher',
 'thor',
 'tiger&bunny',
 'tokyo_ghoul',
 'torchwood',
 'transformers',
 'travelers',
 'twilight',
 'umbrella_academy',
 'vampire_hunter_d',
 'venom',
 'voltron',
 'welcome_to_nightvale',
 'wicked',
 'winx_club',
 'xmen',
 'yona_of_the_dawn',
 'young_hercules',
 'young_justice',
 'yuuri_on_ice',
 'zootopia'}

SyntaxError: EOL while scanning string literal (1847062757.py, line 42)