In [18]:
# Imports
## if this doesn't work, go to https://brew.sh/ and run the installation command in terminal
## and then run
## conda install -c conda-forge selenium
#### isbnlib will also need to be installed:
## pip install isbnlib
## occasionally run in terminal:
## brew update && brew doctor 

import os

import csv

from selenium import webdriver
from requests import get
from bs4 import BeautifulSoup

from isbnlib import is_isbn10, is_isbn13, clean

In [33]:
# Statics
# Note: this URL works, but could probably be cleaned up

base_url = "http://bobcat.library.nyu.edu/primo_library/libweb/action/"
base_url += "search.do?fn=search&ct=search&initialSearch=true&mode=Basic&tab=all&indx=1&dum=true&srt=rank&vid=NYU&frbg=&vl%28freeText0%29="

infile = "data/isbns-sample-minimal.txt"

In [34]:
# Read a txt file of isbns

with open(infile, "r") as f:
    isbns = f.read().splitlines()

In [35]:
# Function to validate isbns

def validate_isbn(isbn):
    return True if is_isbn13(isbn) or is_isbn10(isbn) else False

def pad_isbn(isbn):
    if len(clean(isbn)) < 10:
        return '0' * (10-len(isbn)) + isbn
    return isbn

In [5]:
# Fix leading zeroes

#isbns = [validate_isbn(isbn) for isbn in isbns]

In [36]:
# Create browser instance
# Note: this requires Firefox to be installed

browser = webdriver.Firefox(executable_path="/usr/local/bin/geckodriver")

In [37]:
# Function for finding isbn matches in Bobcat (via Selenium)

def check_bobcat_isbn(isbn):
    #check_bobcat = False # Set default return
    
    valid_isbn = validate_isbn(isbn)
    
    if validate_isbn(isbn):
        url = base_url + isbn # Build URL string
    elif validate_isbn(pad_isbn(isbn)):
        url = base_url + pad_isbn(isbn) # Build URL string
    else:
        return False
    browser.get(url) # Open url in browser instance; should trap response errors
    alert = browser.find_elements_by_class_name('alert') # Only missing ISBNs have an class called alert
    
    return True if len(alert) == 0 else False

In [39]:
# Iterate over isbns and find matches

matches = []

for isbn in isbns:
    isbn = pad_isbn(isbn.upper())
    match = check_bobcat_isbn(isbn)
    matches.append((isbn, match))

In [40]:
# Export to csv

with open('matches.csv','w') as out:
    csv_out=csv.writer(out, quotechar = "'")
    csv_out.writerow(['isbn','match'])
    for row in matches:
        csv_out.writerow(row)

In [41]:
# Close browser instance

browser.quit()

In [42]:
## Added: To find shelf locations of the matches 
import gzip
import io
import pandas as pd


import sqlite3

In [43]:
items = pd.read_csv('isbns-sample.csv')
matches = pd.read_csv('matches.csv')


In [44]:
items_sql = sqlite3.connect(":memory:")

In [45]:
items.to_sql('items',items_sql,if_exists = "replace")
matches.to_sql('matches',items_sql,if_exists = "replace")

In [46]:
isbncheck = pd.read_sql_query('''SELECT matches.isbn, matches.match,
items.isbn, items.shelf, items.shelfplace
FROM matches, items
WHERE matches.isbn = items.isbn

order by matches.match, items.shelf, items.shelfplace''', items_sql)

isbncheck

Unnamed: 0,isbn,match,isbn.1,shelf,shelfplace
0,9789622174580,0,9789622174580,4B,2
1,9622174582,0,9622174582,4B,2
2,3886093271,0,3886093271,4B,4
3,9748496317,0,9748496317,4B,12
4,9789748496313,0,9789748496313,4B,12
5,3763020039,0,3763020039,4B,14
6,9780500277126,0,9780500277126,4B,17
7,1870076141,0,1870076141,4B,18
8,9781870076142,0,9781870076142,4B,18
9,9780714128252,0,9780714128252,4B,19
