In [None]:
# !pip install psycopg2

In [73]:
# Dependencies
from datetime import date, timedelta, datetime
import requests
import pandas as pd
from time import sleep
import pprint as pp
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

# Import local settings
from local_postgres import postgresql as pg
from api_keys import nyt_apikey_1, nyt_apikey_2

In [238]:
# Create URL string for API calls
url = f"https://api.nytimes.com/svc/books/v3/lists/full-overview.json?published_date=2022-12-15&api-key={nyt_apikey_1}"

# Make API request
response = requests.get(url).json()

# View response
pp.pprint(response)

# Empty list which will later be used to create Pandas Dataframe
book_data = []

# Published date
pub_date = datetime.strptime(response['results']['published_date'], '%Y-%m-%d').date()

# Loop through each list in the response
for blist in response['results']['lists']:
    
    list_name = blist['list_name']
    
#     Loop through each book in the list
    for book in blist['books']:
        
        # Append book-related information
        book_data.append({
            'NYT List': list_name,
            'NYT List Published Date': pub_date,            
            'Title': book['title'],
            'Author': book['author'],
            'Imprint': book['publisher'],
            'Rank': book['rank'],
            'Weeks on NYT List': book['weeks_on_list'],
            'Image': book['book_image'],
            'Description': book['description']
        })

{'copyright': 'Copyright (c) 2023 The New York Times Company.  All Rights '
              'Reserved.',
 'num_results': 230,
 'results': {'bestsellers_date': '2022-12-31',
             'lists': [{'books': [{'age_group': '',
                                   'amazon_product_url': 'https://www.amazon.com/dp/1668001225?tag=NYTBSREV-20',
                                   'article_chapter_link': '',
                                   'author': 'Colleen Hoover',
                                   'book_image': 'https://storage.googleapis.com/du-prd/books/images/9781668001226.jpg',
                                   'book_image_height': 500,
                                   'book_image_width': 322,
                                   'book_review_link': '',
                                   'book_uri': 'nyt://book/3aa85e47-4df9-53ef-9957-a77753d3502c',
                                   'buy_links': [{'name': 'Amazon',
                                                  'url': 'https://www.a

In [239]:
# Store book information in pandas dataframe
book_df = pd.DataFrame(book_data)
book_df

Unnamed: 0,NYT List,NYT List Published Date,Title,Author,Imprint,Rank,Weeks on NYT List,Image,Description
0,Combined Print and E-Book Fiction,2023-01-15,IT STARTS WITH US,Colleen Hoover,Atria,1,11,https://storage.googleapis.com/du-prd/books/im...,"In the sequel to “It Ends With Us,” Lily deals..."
1,Combined Print and E-Book Fiction,2023-01-15,LESSONS IN CHEMISTRY,Bonnie Garmus,Doubleday,2,8,https://storage.googleapis.com/du-prd/books/im...,A scientist and single mother living in Califo...
2,Combined Print and E-Book Fiction,2023-01-15,IT ENDS WITH US,Colleen Hoover,Atria,3,81,https://storage.googleapis.com/du-prd/books/im...,A battered wife raised in a violent home attem...
3,Combined Print and E-Book Fiction,2023-01-15,VERITY,Colleen Hoover,Grand Central,4,56,https://storage.googleapis.com/du-prd/books/im...,Lowen Ashleigh is hired by the husband of an i...
4,Combined Print and E-Book Fiction,2023-01-15,THE SEVEN HUSBANDS OF EVELYN HUGO,Taylor Jenkins Reid,Washington Square/Atria,5,78,https://storage.googleapis.com/du-prd/books/im...,A movie icon recounts stories of her loves and...
...,...,...,...,...,...,...,...,...,...
225,Young Adult Paperback Monthly,2023-01-15,BETTER THAN THE MOVIES,Lynn Painter,Simon & Schuster,6,0,https://storage.googleapis.com/du-prd/books/im...,
226,Young Adult Paperback Monthly,2023-01-15,THE WAY I USED TO BE,Amber Smith,Margaret K. McElderry,7,0,https://storage.googleapis.com/du-prd/books/im...,
227,Young Adult Paperback Monthly,2023-01-15,DEMON SLAYER: KIMETSU NO YAIBA--THE FLOWER OF ...,Aya Yajima.,Viz Media,8,0,https://storage.googleapis.com/du-prd/books/im...,
228,Young Adult Paperback Monthly,2023-01-15,LEGENDBORN,Tracy Deonn,Margaret K. McElderry,9,0,https://storage.googleapis.com/du-prd/books/im...,


In [132]:
# Create SQLAlchemy engine
pg_path = f"postgresql+psycopg2://{pg['user']}:{pg['password']}@{pg['host']}:{pg['port']}/{pg['db']}"
engine = create_engine(pg_path)

In [133]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [134]:
# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

In [135]:
# Create session
session = Session(engine)

In [136]:
Base.classes.keys()

['imprints', 'ownership', 'publishers', 'authors', 'books', 'lists', 'results']

In [137]:
Authors = Base.classes.authors
Books = Base.classes.books
Imprints = Base.classes.imprints
Lists = Base.classes.lists
Results = Base.classes.results
Publishers = Base.classes.publishers
Ownership = Base.classes.ownership

In [240]:
lists_from_df = book_df['NYT List'].unique()
lists_from_pg = list(blist[0] for blist in session.query(Lists.list_name).all())
lists_to_add = list(blist for blist in lists_from_df if blist not in lists_from_pg)

if len(lists_to_add) > 0:
    for blist in lists_to_add:
        session.add(Lists(list_name=blist))
    session.commit()

In [241]:
imprints_from_df = book_df['Imprint'].unique()
imprints_from_pg = list(imprint[0] for imprint in session.query(Imprints.imprint_name).all())
imprints_to_add = list(imprint for imprint in imprints_from_df if imprint not in imprints_from_pg)

if len(imprints_to_add) > 0:
    for imprint in imprints_to_add:
        session.add(Imprints(imprint_name=imprint))
    session.commit()

In [242]:
authors_from_df = book_df['Author'].unique()
authors_from_pg = list(author[0] for author in session.query(Authors.author_name).all())
authors_to_add = list(author for author in authors_from_df if author not in authors_from_pg)

if len(authors_to_add) > 0:
    for author in authors_to_add:
        session.add(Authors(author_name=author))
    session.commit()

In [243]:
books_from_pg = list(book[0] for book in session.query(Books.book_title).all())

for i in range(len(book_df)):
    title = book_df.loc[i, 'Title']
    
    if title not in books_from_pg:
        image = book_df.loc[i, 'Image']
        desc = book_df.loc[i, 'Description']
        author = book_df.loc[i, 'Author']
        imprint = book_df.loc[i, 'Imprint']

        author_id = session.query(Authors.author_id).filter(Authors.author_name == author).first()[0]
        imprint_id = session.query(Imprints.imprint_id).filter(Imprints.imprint_name == imprint).first()[0]

        session.add(Books(book_title = title, \
                             book_image = image, \
                             book_description = desc, \
                             author_id = author_id, \
                             imprint_id = imprint_id))

session.commit()

In [244]:
for i in range(len(book_df)):
    blist = book_df.loc[i, 'NYT List']
    title = book_df.loc[i, 'Title']
    pub_date = book_df.loc[i, 'NYT List Published Date']
    rank = int(book_df.loc[i, 'Rank'])
    weeks_on_list = int(book_df.loc[i, 'Weeks on NYT List'])
    
    list_id = session.query(Lists.list_id).filter(Lists.list_name == blist).first()[0]
    book_id = session.query(Books.book_id).filter(Books.book_title == title).first()[0]
    
    session.add(Results(list_id = list_id, \
                           book_id = book_id, \
                           published_date = pub_date, \
                           rank = rank, \
                           weeks_on_list = weeks_on_list))

session.commit()  

In [162]:
# publisher_df = pd.read_csv('Publishers.csv')
# publisher_df

Unnamed: 0,publisher_id,publisher_name
0,1,Basic
1,2,Bloom
2,3,Canary Street
3,4,Lioncrest
4,5,Row House
...,...,...
78,79,Science Future Press
79,80,South Dakota Historical Society Press
80,81,Aces Press
81,82,Rowman & Littlefield


In [164]:
# publishers = publisher_df['publisher_name'].unique()
# for publisher in publishers:
#     session.add(Publishers(publisher_name = publisher))
# session.commit()

In [166]:
# ownership_df = pd.read_csv('Ownership_v2.csv')
# ownership_df

Unnamed: 0,Imprint,Publisher
0,Basic,Basic
1,Bloom,Bloom
2,Canary Street,Canary Street
3,Lioncrest,Lioncrest
4,Row House,Row House
...,...,...
331,Zebra,Zebra Press
332,Basic Books,Hachette Book
333,Little Brown,Hachette Book
334,America's Test Kitchen,Marquee Brands


In [172]:
# imprints_from_csv = ownership_df['Imprint'].unique()
# imprints_from_pg = list(imprint[0] for imprint in session.query(Imprints.imprint_name).all())
# imprints_to_add = list(imprint for imprint in imprints_from_csv if imprint not in imprints_from_pg)

In [176]:
# for imprint in imprints_to_add:
#     session.add(Imprints(imprint_name = imprint))
# session.commit()

In [177]:
# publishers_from_csv = ownership_df['Publisher'].unique()
# publishers_from_pg = list(publisher[0] for publisher in session.query(Publishers.publisher_name).all())
# publishers_to_add = list(publisher for publisher in publishers_from_csv if publisher not in publishers_from_pg)

In [185]:
# for i in range(len(ownership_df)):
#     imprint = ownership_df.loc[i, 'Imprint']
#     publisher = ownership_df.loc[i, 'Publisher']
    
#     imprint_id = session.query(Imprints.imprint_id).filter(Imprints.imprint_name == imprint).first()[0]
#     publisher_id = session.query(Publishers.publisher_id).filter(Publishers.publisher_name == publisher).first()[0]

#     session.add(Ownership(imprint_id = imprint_id, publisher_id = publisher_id))

# session.commit()

In [245]:
session.close()