# Cleaning s6 intaking urls from Safari reading list
- **By:** Sofia Kobayashi
- **Date:** 11/18/2023
- **Description:** Intaking urls from the Safari reading lists

In [1]:
import pandas as pd
import re
import AO3
import json
import plistlib
from datetime import datetime
import re

import os
from dotenv import load_dotenv
from pathlib import Path
env_path = Path(".") / ".env"
load_dotenv(dotenv_path=env_path)

pd.set_option('display.max_columns', None)

### Load in reading list

In [136]:
def readingListToJson():
    now = datetime.now()
    current_date = now.strftime("%m-%d-%y")

    INPUT_FILE  = os.path.join(os.environ['HOME'], 'Library/Safari/Bookmarks.plist')
    OUTPUT_FILE = f"reading_lists/readinglist_{current_date}.json"

    # Load and parse the Bookmarks file
    with open(INPUT_FILE, 'rb') as plist_file:
        plist = plistlib.load(plist_file)

    # Look for the child node which contains the Reading List data.
    # There should only be one Reading List item
    children = plist['Children']
    for child in children:
        if child.get('Title', None) == 'com.apple.ReadingList':
            reading_list = child

    # Extract the bookmarks
    bookmarks = reading_list['Children']

    # For each bookmark in the bookmark list, grab the URL, dateAdded & dateLastViewed
    urls = []

    for bookmark in bookmarks:
        temp = {}
        temp["url"] = bookmark["URLString"]
        temp["dateAdded"] = bookmark["ReadingList"]["DateAdded"].strftime("%m-%d-%y %H:%M:%S")
        temp["dateLastViewed"] = bookmark["ReadingList"].get("DateLastViewed")
        if temp["dateLastViewed"] != None: temp["dateLastViewed"] = temp["dateLastViewed"]\
            .strftime("%m-%d-%y %H:%M:%S")
        urls.append(temp)

    # Write the URLs to a file
    with open(OUTPUT_FILE, "w") as outfile:
        json.dump(urls, outfile)

    print(f'Created new readinglist in "{OUTPUT_FILE}"')
    return OUTPUT_FILE

In [137]:
# Intake new reading list
FILE = readingListToJson()

with open(FILE, 'r') as infile:
    rl = json.load(infile)


Created new readinglist in "reading_lists/readinglist_12-06-23.json"


### De-dup reading list

In [441]:
# ELEPHANT - USE JSON FOR NOW, EVENTUALLY SWITCH TO LOADING IN ALL CURRENT DBs & SCRAPING FROM THERE

In [6]:
# De-dup against ALL pre-processed links (old storage JSON)
with open('reading_lists/already_seen_urls.json') as infile: 
    already_seen = json.load(infile)
    already_seen_urls = set([pair['url'] for pair in already_seen])

rl_dedup = []
for info in rl:
    if info['url'] not in already_seen_urls:
        info['dateProcessed'] = datetime.now().strftime("%m-%d-%y %H:%M:%S")
        rl_dedup.append(info)

# rl_dedup

### Sort into ao3, other fanfic other else
- Initialize ao3 df

In [7]:
# Sort into AO3, other fanfiction (ffn.net, Wattpad, tumblr, etc), everything else
ao3_list = []
other_fanfic = []
other_else = []

for info in rl_dedup:
    if 'archiveofourown.org' in info['url']:
        ao3_list.append(info)
        
    for dom in ['fanfiction.net', 'wattpad.com', 'tumblr.com', 'facebook.com']:
        if dom in info['url']:
            other_fanfic.append(info)
            break
    else:
        other_else.append(info)

# set([pair['url'].split('/')[2] for pair in other_fanfic])

In [8]:
# Initialize ao3 df
ao3 = pd.DataFrame(ao3_list)
ao3['is_new'] = True

### Sort ao3 links into type

In [9]:
# Divide urls by ao3_type
def ao3_type(url):
    ao3_type = re.compile('archiveofourown.org/(\w+)(.{1})').findall(url)[0]
    
    if ao3_type[1] == '?':
        return ({'url_type': ao3_type[0], 'type': 'query'})  
    elif ao3_type[0] == 'bookmarks': # easy
        return ({'url_type': 'bookmarks', 'type': 'bookmarks'})
    
    elif ao3_type[0] == 'chapters':
        return ({'url_type': 'chapters', 'type': 'works'})
    
    elif ao3_type[0] == 'collections': 
        if '/works/' in url:
            return ({'url_type': 'collections', 'type': 'works'})
        else:
            return ({'url_type': 'collections', 'type': 'collections'})
    
    elif ao3_type[0] == 'comments': # easy
        return ({'url_type': 'comments', 'type': 'comments'})
    
    elif ao3_type[0] == 'external_works': # easy
        return ({'url_type': 'external_works', 'type': 'external_works'})
    
    elif ao3_type[0] == 'fandoms': # easy
        return ({'url_type': 'fandoms', 'type': 'tags'})
    
    elif ao3_type[0] == 'series': # easy
        return ({'url_type': 'series', 'type': 'series'})
    
    elif ao3_type[0] == 'tags': # easy
        return ({'url_type': 'tags', 'type': 'tags'})
    
    elif ao3_type[0] == 'users': # easy
        return ({'url_type': 'users', 'type': 'users'})
    
    elif ao3_type[0] == 'works':
        return ({'url_type': 'works', 'type': 'works'})

In [10]:
# Fill ao3 type col
for ind in ao3.index:
    types = ao3_type(ao3.loc[ind, 'url'])
    ao3.loc[ind, 'url_type'] = types['url_type']
    ao3.loc[ind, 'type'] = types['type']

In [11]:
ao3.type.value_counts()
# ao3[ao3.type=='chapters']

type
works             2904
series             135
users               71
tags                34
collections          9
query                4
external_works       1
comments             1
Name: count, dtype: int64

In [12]:
sorted(ao3.type.value_counts().index)

['collections',
 'comments',
 'external_works',
 'query',
 'series',
 'tags',
 'users',
 'works']

## Handling Each Type

### Handling 'collections'
- Get collection name

In [13]:
collections = ao3[ao3.type=='collections'].reset_index(drop=True)

# Get collection name col
for ind in collections.index:
    url = collections.loc[ind, 'url']
    search = re.compile('/collections/(\w+)').findall(url)[0]
    collections.loc[ind, 'id'] = search

    
collections = collections[['id', 'url_type', 'type', 'url', 'dateProcessed', 
                           'dateAdded', 'dateLastViewed', 'is_new']]
collections.head()

Unnamed: 0,id,url_type,type,url,dateProcessed,dateAdded,dateLastViewed,is_new
0,atla_simp,collections,collections,https://archiveofourown.org/collections/atla_simp,11-22-23 14:20:07,04-29-23 18:16:47,,True
1,Heavenly_Treasury,collections,collections,https://archiveofourown.org/collections/Heaven...,11-22-23 14:20:07,03-08-23 14:14:18,,True
2,SJ_Reset,collections,collections,https://archiveofourown.org/collections/SJ_Res...,11-22-23 14:20:07,02-11-23 20:58:11,,True
3,TimeTravel_WorldTravel,collections,collections,https://archiveofourown.org/collections/TimeTr...,11-22-23 14:20:07,01-06-23 07:42:54,,True
4,TheCrackheadBible,collections,collections,https://archiveofourown.org/collections/TheCra...,11-22-23 14:20:07,03-30-22 23:44:32,,True


### Handling 'comments'
- No handling

In [14]:
comments = ao3[ao3.type=='comments'].reset_index(drop=True)
comments = comments[['url_type', 'type', 'url', 'dateProcessed', 'dateAdded', 'dateLastViewed', 'is_new']]
comments

Unnamed: 0,url_type,type,url,dateProcessed,dateAdded,dateLastViewed,is_new
0,comments,comments,https://archiveofourown.org/comments/215664449,11-22-23 14:20:07,11-11-22 03:43:45,,True


### Handling 'external_works'
- url types: external_works
- Manually get referenced work

In [16]:
# Process chapter
external_works = ao3[ao3.url_type=='external_works'].reset_index(drop=True)\
                                                    .rename(columns={'url': 'external_url'})
external_works['url'] = ['https://www.fanfiction.net/s/12334156/4/Travelers']
external_works

have to hand-process these

Unnamed: 0,external_url,dateAdded,dateLastViewed,dateProcessed,is_new,url_type,type,url
0,https://archiveofourown.org/external_works/637417,01-02-22 07:33:23,,11-22-23 14:20:07,True,external_works,external_works,https://www.fanfiction.net/s/12334156/4/Travelers


### Handling 'query'
- No handling

In [17]:
query = ao3[ao3.type=='query'].reset_index(drop=True)

query = query[['url_type', 'type', 'url', 'dateProcessed', 'dateAdded', 'dateLastViewed', 'is_new']]
query

Unnamed: 0,url_type,type,url,dateProcessed,dateAdded,dateLastViewed,is_new
0,works,query,https://archiveofourown.org/works?commit=Sort+...,11-22-23 14:20:07,03-09-23 16:13:44,,True
1,bookmarks,query,https://archiveofourown.org/bookmarks?commit=S...,11-22-23 14:20:07,11-23-22 00:49:33,,True
2,works,query,https://archiveofourown.org/works?commit=Sort+...,11-22-23 14:20:07,10-09-22 20:43:07,10-10-22 13:53:54,True
3,works,query,https://archiveofourown.org/works?commit=Sort+...,11-22-23 14:20:07,08-06-22 07:36:29,,True


### Handling 'series'
- url types: series
- Add series id

In [18]:
series = ao3[ao3.type=='series'].reset_index(drop=True)

# Add series id
for ind in series.index:
    url = series.loc[ind, 'url']
    search = re.compile('series/(\d+)').findall(url)[0]
    series.loc[ind, 'id'] = search

series = series[['id', 'url_type', 'type', 'url', 'dateProcessed', 'dateAdded', 'dateLastViewed', 'is_new']]
series.head()

Unnamed: 0,id,url_type,type,url,dateProcessed,dateAdded,dateLastViewed,is_new
0,3226068,series,series,https://archiveofourown.org/series/3226068,11-22-23 14:20:07,11-12-23 09:29:43,,True
1,3606640,series,series,https://archiveofourown.org/series/3606640,11-22-23 14:20:07,11-10-23 08:31:49,,True
2,1600705,series,series,https://archiveofourown.org/series/1600705,11-22-23 14:20:07,11-04-23 11:35:44,,True
3,824460,series,series,https://archiveofourown.org/series/824460,11-22-23 14:20:07,10-24-23 20:02:31,,True
4,3509641,series,series,https://archiveofourown.org/series/3509641,11-22-23 14:20:07,09-29-23 11:40:54,,True


### Handling 'tags'
- url types: fandoms, tags
- Parse & add tag text

In [19]:
from urllib.parse import unquote
tags = ao3[ao3.type=='tags'].reset_index(drop=True)

# Add tag text
for ind in tags.index:
    url = tags.loc[ind, 'url']
    search = re.compile('/(tags|fandoms)/(.+)(/works|$)').findall(url)[0][1].split('/works')[0]
    tags.loc[ind, 'text'] = unquote(search)

tags = tags[['url_type', 'type', 'url', 'text', 'dateProcessed', 'dateAdded', 'dateLastViewed', 'is_new']]
tags.head()

Unnamed: 0,url_type,type,url,text,dateProcessed,dateAdded,dateLastViewed,is_new
0,tags,tags,https://archiveofourown.org/tags/Haikyuu!!/works,Haikyuu!!,11-22-23 14:20:07,10-08-23 07:11:09,,True
1,tags,tags,https://archiveofourown.org/tags/212th%20Attac...,212th Attack Battalion Members (Star Wars: The...,11-22-23 14:20:07,08-27-23 09:23:00,,True
2,tags,tags,https://archiveofourown.org/tags/Code%20Geass/...,Code Geass,11-22-23 14:20:07,08-22-23 07:21:44,,True
3,tags,tags,https://archiveofourown.org/tags/Top%20Gun%20(...,Top Gun (Movies),11-22-23 14:20:07,07-23-23 03:24:06,,True
4,fandoms,tags,http://archiveofourown.org/fandoms/The%20Schoo...,The School for Good and Evil - Soman Chainani,11-22-23 14:20:07,04-16-23 18:42:20,,True


### Handling 'users'
- url types: users
- Parse & Add user pen name (id)

In [20]:
users = ao3[ao3.type=='users'].reset_index(drop=True)

# Add user pen name (id)
for ind in users.index:
    url = users.loc[ind, 'url']
    search = re.compile('users/(\w+)').findall(url)[0]
    users.loc[ind, 'id'] = search

users = users[['id','url_type', 'type', 'url', 'dateProcessed', 'dateAdded', 'dateLastViewed', 'is_new']]
users.head()

Unnamed: 0,id,url_type,type,url,dateProcessed,dateAdded,dateLastViewed,is_new
0,indefinitelyforever,users,users,https://archiveofourown.org/users/indefinitely...,11-22-23 14:20:07,11-03-23 02:03:58,,True
1,Kaeyas_thighs_tho,users,users,https://archiveofourown.org/users/Kaeyas_thigh...,11-22-23 14:20:07,10-31-23 22:39:22,11-03-23 00:44:03,True
2,naarii,users,users,https://archiveofourown.org/users/naarii/pseud...,11-22-23 14:20:07,10-05-23 22:18:40,,True
3,azunshi,users,users,https://archiveofourown.org/users/azunshi/pseu...,11-22-23 14:20:07,09-26-23 21:51:13,,True
4,RisqueRaven,users,users,https://archiveofourown.org/users/RisqueRaven/...,11-22-23 14:20:07,09-05-23 08:36:59,,True


### Handling 'works'
- chapters, col_work, works

In [23]:
# Process chapter
chapters = ao3[ao3.url_type=='chapters'].reset_index(drop=True).rename(columns={'url': 'chapter_url'})
chapters['url'] = []

for ind in chapters.index:
    url = chapters.loc[ind, 'url']
    search = re.compile('/works/(\d+)').findall(url)[0]
    chapters.loc[ind, 'id'] = search

chapters

have to hand-process these

Unnamed: 0,chapter_url,dateAdded,dateLastViewed,dateProcessed,is_new,url_type,type,url


In [24]:
col_work = ao3[(ao3.url_type=='collections') & (ao3.type == 'works')].reset_index(drop=True)

# Fill from_collection
for ind in col_work.index:
    url = col_work.loc[ind, 'url']
    
    search = re.compile('/works/(\d+)').findall(url)[0]
    col_work.loc[ind, 'id'] = search
    
    search = re.compile('/collections/(\w+)').findall(url)[0]
    col_work.loc[ind, 'from_collection'] = search

col_work.head()

Unnamed: 0,url,dateAdded,dateLastViewed,dateProcessed,is_new,url_type,type,id,from_collection
0,https://archiveofourown.org/collections/Whynot...,08-22-23 07:18:47,,11-22-23 14:20:07,True,collections,works,18530821,Whynotearlier
1,https://archiveofourown.org/collections/Whynot...,08-22-23 07:18:07,,11-22-23 14:20:07,True,collections,works,39376755,Whynotearlier
2,https://archiveofourown.org/collections/Whynot...,08-22-23 07:16:38,,11-22-23 14:20:07,True,collections,works,24311155,Whynotearlier
3,https://archiveofourown.org/collections/Whynot...,08-22-23 07:16:12,,11-22-23 14:20:07,True,collections,works,42481512,Whynotearlier
4,https://archiveofourown.org/collections/Whynot...,08-22-23 07:14:47,,11-22-23 14:20:07,True,collections,works,36456823,Whynotearlier


In [25]:
works = ao3[(ao3.url_type=='works') & (ao3.type=='works')].reset_index(drop=True)

# Fill id
for ind in works.index:
    url = works.loc[ind, 'url']
    search = re.compile('/works/(\w+)').findall(url)[0]
    works.loc[ind, 'id'] = search

# Concatenate chapters, col_works, and works into works DF
works = pd.concat([chapters, col_work, works])
works = works[['id','url_type', 'type', 'url', 'from_collection', 'chapter_url', 
               'dateProcessed', 'dateAdded', 'dateLastViewed', 'is_new']]
works['is_new'] = True

works.head()

Unnamed: 0,id,url_type,type,url,from_collection,chapter_url,dateProcessed,dateAdded,dateLastViewed,is_new
0,18530821,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:18:47,,True
1,39376755,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:18:07,,True
2,24311155,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:16:38,,True
3,42481512,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:16:12,,True
4,36456823,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:14:47,,True


In [None]:
## Merge with current AO3 databases

In [8]:
# Merge with current AO3 dtb

## Load in new AO3 stuff & update DBs
- works, series, collections(?), users(?)

In [442]:
# Load in new AO3 

# Workspace

In [110]:
df = pd.read_csv('fic_url_clean_1.csv', encoding='utf-8', index_col=0)
d = []
for u in df.url:
    d.append({'url': u})

In [112]:
df = pd.read_csv('../v9.2_jupyter-ffnv9/checkpoints_data/all_ao3_links_until_01-13-23__02-26-23.csv', encoding='utf-8', index_col=0)
d = []
for u in df.url:
    d.append({'url': u})

In [114]:
def all_types_test(reading_list):
    """
    Takes a reading list style list of ao3 info.
    Returns all ao3 types present in list.
    """
    count = set()
    for info in reading_list:
        ao3_type = re.compile('archiveofourown.org/(\w+)(.{1})').findall(info['url'])[0]
        count.add(ao3_type)
        if ao3_type[0] == 'bookmarks':
            print(info['url'])
        
    return sorted(count, key=lambda x: x[0])

all_types_test(d)

https://archiveofourown.org/bookmarks?commit=Sort+and+Filter&bookmark_search%5Bsort_column%5D=created_at&include_bookmark_search%5Brelationship_ids%5D%5B%5D=27817261&bookmark_search%5Bother_tag_names%5D=&bookmark_search%5Bother_bookmark_tag_names%5D=&bookmark_search%5Bexcluded_tag_names%5D=&bookmark_search%5Bexcluded_bookmark_tag_names%5D=&bookmark_search%5Bbookmarkable_query%5D=&bookmark_search%5Bbookmark_query%5D=&bookmark_search%5Blanguage_id%5D=&bookmark_search%5Brec%5D=0&bookmark_search%5Bwith_notes%5D=0&user_id=kyme


[('bookmarks', '?'),
 ('chapters', '/'),
 ('collections', '/'),
 ('comments', '/'),
 ('series', '/'),
 ('tags', '/'),
 ('users', '/'),
 ('works', '/'),
 ('works', '?')]

In [None]:
# total = set()
# for info in ao3:
#     t = ao3_type(info['url'])
#     if t == None:
#         print(info)
#     total.add(t)

# total

In [38]:
df2 = pd.read_csv('clean_data_4/all_versions_fic_url.csv', index_col=0, encoding='utf-8')

In [42]:
df2[~pd.isnull(df2.title)]

Unnamed: 0,version_num,smk_source,dtb_type,location,fic_id,url,categories,is_bold,fandom_type,fandom,fic_status,title,is_coffee,fic_series,author,length,is_complete,is_subbed,is_backedup,is_bookmarked,in_category,all_tags,current_chapter,fic_rating,to_read_rating,to_read_description,is_finished_inputting_data,ffn_date_updated_2-4-23,ffn_date_added
579,7.0,v7_updates,to_read,ao3,,http://archiveofourown.org/works/260273/chapte...,,False,,"batman,dcu",,Gonna Be A Better One (A Thousand Miles To You...,,,,,,,,,,,,,,"TimKon, In which Tim quits being Robin, Kon re...",,,
960,7.1,v7.1_ffn,read,ao3,,https://archiveofourown.org/works/717740/chapt...,,,,bleach,,"On Life, On Death, On Everything In-Between",,,"cywscross,",51,False,True,TRUE,,,"well_written,.Time Travel,.BAMF,.Fix-It,.Prote...",,3.0,,,True,,
611,7.0,v7_updates,to_read,ao3,,https://archiveofourown.org/collections/best_i...,,False,,naruto,,"Dirt and Ashes, or: The One-and-a-Half Body Pr...",,,,,,,,,,,,,,"90k, Tozette's ""In which Sakura carries half o...",,,
584,7.0,v7_updates,to_read,ao3,,https://archiveofourown.org/collections/bnhafi...,,False,,bnha,,(How To) Forgive and Forget,,,,,,,,,,,,,,"76 collections, quirkless discrimination",,,
514,7.0,v7_updates,to_read,ao3,,https://archiveofourown.org/collections/Clever...,,False,,"bnha,katekyo_hitman_reborn",,curiosity kills the cat (but satisfaction brin...,,,,,,,,,,,,,,"33k, ""Stuck in the body of a cat in another un...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3852,8.0,v8_local_files,,oth,,https://www.royalroad.com/fiction/36735/the-pe...,,,,original,,The Perfect Run,,,Void Herald,,,,,,,,,,,,,,
1535,8.0,chrome,,oth,,https://www.royalroad.com/fiction/42226/foxs-t...,,,,,,Fox’s Tongue and Kirin’s Bone,,,MuffinLance,,,,,,,,,,,,,,
3853,8.0,v8_local_files,,oth,,https://www.royalroad.com/fiction/42226/foxs-t...,,,,,,Fox’s Tongue and Kirin’s Bone,,,MuffinLance,,,,,,,,,,,,,,
3854,8.0,v8_local_files,,oth,,https://www.royalroad.com/fiction/44543/fairyp...,,,,,,#FairyPrincessProblems,,,Miscellea,,,,,,,,,,,,,,


In [None]:
w2 = AO3.Work(13273611, session=ao3_session)

In [None]:
ao3_session = AO3.Session(os.environ['AO3_USERNAME'], os.environ['AO3_PASSWORD'])

In [27]:
import AO3

w1 = AO3.Work(18530821)

w1.bookmark
w1.collect
w1.comment
w1.delete_bookmark
w1.download
w1.download_to_file
w1.get
w1.get_comments
w1.get_images
w1.leave_kudos
w1.load_chapters
w1.


In [57]:
w1.authenticity_token
w1.authors
w1.bookmarks
w1.categories
w1.chapters
w1.characters
w1.collections
w1.comments
w1.complete
w1.date_edited
w1.date_published
w1.date_updated
w1.end_notes
w1.expected_chapters
w1.fandoms
w1.hits
w1.is_subscribed
w1.kudos
w1.language
w1.loaded
w1.reload
w1.request
w1.set_session
w1.str_format
w1.subscribe
w1.unsubscribe


SyntaxError: invalid syntax (3495360488.py, line 6)

In [104]:
w1.str_format

<function AO3.works.Work.str_format(string)>

In [107]:
works.head()

Unnamed: 0,id,url_type,type,url,from_collection,chapter_url,dateProcessed,dateAdded,dateLastViewed,is_new
0,18530821,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:18:47,,True
1,39376755,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:18:07,,True
2,24311155,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:16:38,,True
3,42481512,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:16:12,,True
4,36456823,collections,works,https://archiveofourown.org/collections/Whynot...,Whynotearlier,,11-22-23 14:20:07,08-22-23 07:14:47,,True


In [None]:
#session=AO3.Session(os.environ['AO3_USERNAME'], os.environ['AO3_PASSWORD'])):
def save_chapter(ind, df, session='hi'):
    """
    Modifies df in place to load info of fic at ind.
    """
    # work = AO3.Work(df.loc[ind, 'id'], session=session)
    work=w2

In [113]:
#session=AO3.Session(os.environ['AO3_USERNAME'], os.environ['AO3_PASSWORD'])):
def save_fic(ind, df, session=AO3.Session(os.environ['AO3_USERNAME'], os.environ['AO3_PASSWORD'])):
    """
    Modifies df in place to load info of fic at ind.
    """
    # work = AO3.Work(df.loc[ind, 'id'], session=session)
    work=w2

    # Get misc data
    df.loc[ind, 'api_url'] = work.url
    df.loc[ind, 'authenticity_token'] = work.authenticity_token
    df.loc[ind, 'start_notes'] = work.start_notes
    df.loc[ind, 'end_notes'] = work.end_notes
    df.loc[ind, 'is_subscribed'] = work.is_subscribed
    df.loc[ind, 'loaded'] = work.loaded
    df.loc[ind, 'oneshot'] = work.oneshot
    
    
    # Save text
    with open(f'ao3_text/{df.loc[ind, 'id']}_{datetime.now().strftime("%m-%d-%y")}') as outfile:
        json.dump(work.text, outfile)
            
    # Save chapters
    df.loc[ind, 'chapters'] = work.chapters

    # Save metadata

works2 = works.copy()
savefic(0, works2, 'hi')

In [131]:
w1.chapters[0].work

<Work [Flimflammer]>

In [None]:
parent_id
'authenticity_token',
 'end_notes',
 'id',
 'loaded',
 'number',
 'start_notes',
 'summary',
 'text',
 'title',
 'url',
 'words',

In [118]:
dir(w1.chapters[0])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_session',
 '_soup',
 '_work',
 'authenticity_token',
 'comment',
 'end_notes',
 'get',
 'get_comments',
 'get_images',
 'id',
 'loaded',
 'number',
 'reload',
 'request',
 'set_session',
 'start_notes',
 'summary',
 'text',
 'title',
 'url',
 'words',
 'work']

In [133]:
w2.chapters[0].text



In [95]:
ke = ['authenticity_token',
 'authors',
 'bookmark',
 'bookmarks',
 'categories',
 'chapters',
 'characters',
 'collect',
 'collections',
 'comment',
 'comments',
 'complete',
 'date_edited',
 'date_published',
 'date_updated',
 'delete_bookmark',
 'download',
 'download_to_file',
 'end_notes',
 'expected_chapters',
 'fandoms',
 'get',
 'get_comments',
 'get_images',
 'hits',
 'id',
 'is_subscribed',
 'kudos',
 'language',
 'leave_kudos',
 'load_chapters',
 'loaded',
 'metadata',
 'nchapters',
 'oneshot',
 'rating',
 'relationships',
 'reload',
 'request',
 'restricted',
 'series',
 'set_session',
 'start_notes',
 'status',
 'str_format',
 'subscribe',
 'summary',
 'tags',
 'text',
 'title',
 'unsubscribe',
 'url',
 'warnings',
 'words']

In [49]:
w1.warnings



In [31]:
dir(w1)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_bookmarkid',
 '_session',
 '_soup',
 '_sub_id',
 'authenticity_token',
 'authors',
 'bookmark',
 'bookmarks',
 'categories',
 'chapters',
 'characters',
 'collect',
 'collections',
 'comment',
 'comments',
 'complete',
 'date_edited',
 'date_published',
 'date_updated',
 'delete_bookmark',
 'download',
 'download_to_file',
 'end_notes',
 'expected_chapters',
 'fandoms',
 'get',
 'get_comments',
 'get_images',
 'hits',
 'id',
 'is_subscribed',
 'kudos',
 'language',
 'leave_kudos',
 'load_chapters',
 'loaded',
 'metadata',
 'nchapters',
 'oneshot',
 'rating',
 'relationships',
 'reload',