In [50]:
# Imports
## if this doesn't work, go to https://brew.sh/ and run the installation command in terminal
## and then run
## conda install -c conda-forge selenium
#### isbnlib will also need to be installed:
## pip install isbnlib
## occasionally run in terminal:
## brew update && brew doctor 

import os

import csv

from selenium import webdriver
from requests import get
from bs4 import BeautifulSoup

from isbnlib import is_isbn10, is_isbn13, clean

In [51]:
# Statics
# Note: this URL works, but could probably be cleaned up

base_url = "http://bobcat.library.nyu.edu/primo_library/libweb/action/"
base_url += "search.do?fn=search&ct=search&initialSearch=true&mode=Basic&tab=all&indx=1&dum=true&srt=rank&vid=NYU&frbg=&vl%28freeText0%29="

infile = "data/isbns-sample-minimal.txt"

In [52]:
# Read a txt file of isbns

with open(infile, "r") as f:
    isbns = f.read().splitlines()

In [53]:
# Function to validate isbns

def validate_isbn(isbn):
    return True if is_isbn13(isbn) or is_isbn10(isbn) else False

def pad_isbn(isbn):
    if len(clean(isbn)) < 10:
        return '0' * (10-len(isbn)) + isbn
    return isbn

In [5]:
# Fix leading zeroes

#isbns = [validate_isbn(isbn) for isbn in isbns]

In [54]:
# Create browser instance
# Note: this requires Firefox to be installed

browser = webdriver.Firefox(executable_path="/usr/local/bin/geckodriver")

In [55]:
# Function for finding isbn matches in Bobcat (via Selenium)

def check_bobcat_isbn(isbn):
    #check_bobcat = False # Set default return
    
    valid_isbn = validate_isbn(isbn)
    
    if validate_isbn(isbn):
        url = base_url + isbn # Build URL string
    elif validate_isbn(pad_isbn(isbn)):
        url = base_url + pad_isbn(isbn) # Build URL string
    else:
        return False
    browser.get(url) # Open url in browser instance; should trap response errors
    alert = browser.find_elements_by_class_name('alert') # Only missing ISBNs have an class called alert
    
    return True if len(alert) == 0 else False

In [56]:
# Iterate over isbns and find matches

matches = []

for isbn in isbns:
    isbn = pad_isbn(isbn.upper())
    match = check_bobcat_isbn(isbn)
    matches.append((isbn, match))

In [57]:
# Export to csv

with open('matches.csv','w') as out:
    csv_out=csv.writer(out, quotechar = "'")
    csv_out.writerow(['isbn','match'])
    for row in matches:
        csv_out.writerow(row)

In [58]:
# Close browser instance

browser.quit()

In [59]:
## Added: To find shelf locations of the matches 
import gzip
import io
import pandas as pd


import sqlite3

In [60]:
items = pd.read_csv('data/isbns-sample.csv')
matches = pd.read_csv('matches.csv')


In [61]:
items_sql = sqlite3.connect(":memory:")

In [62]:
items.to_sql('items',items_sql,if_exists = "replace")
matches.to_sql('matches',items_sql,if_exists = "replace")

In [63]:
isbncheck = pd.read_sql_query('''SELECT matches.isbn, matches.match,
items.isbn, items.shelf, items.shelfplace
FROM matches, items
WHERE matches.isbn = items.isbn

order by matches.match, items.shelf, items.shelfplace''', items_sql)

isbncheck

Unnamed: 0,isbn,match,isbn.1,shelf,shelfplace
0,3201016780,0,3201016780,4A,1
1,9782711823642,0,9782711823642,4A,6
2,2855397359,0,2855397359,4A,7
3,9781851774296,0,9781851774296,4A,8
4,7560710077,0,7560710077,4A,9
5,9787228032488,0,9787228032488,4A,10
6,9782220026299,0,9782220026299,4A,11
7,9781904832164,0,9781904832164,4A,16
8,9748434575,0,9748434575,4A,21
9,3761106262,0,3761106262,4A,25


In [72]:
# Write the query results as a CSV
# The CSV will include all ISBNs searched
# Items with a "O" in the "match" column were not found in the catalog
# Items with a "1" in the "match" column were found in the catalog

isbncheck.to_csv('isbnoutput.csv', sep='\t', encoding='utf-8')


pandas.core.frame.DataFrame