## Publisher Spreadsheets

In [None]:
import importlib

import parse_publisher_sheets as Publishers
importlib.reload(Publishers)

In [None]:
all_publ_dicts = Publishers.main()

In [None]:
publ_isbns_set = Publishers.make_set_all_isbns(all_publ_dicts)

## Bookstore csv

In [None]:
import re
import parse_bookstore_csv as Bookstore

ISBNregex = re.compile(r'(\b\d{13}\b)|(\b\d{9}[\d|X]\b)')

bookstore_csv_as_list = Bookstore.cleanup_original_text('output/Summerbookstore2017.csv')

In [None]:
bookstore_isbns = {row[7].replace('-', ''): row for row in bookstore_csv_as_list if ISBNregex.match(row[7].replace('-', ''))}

## findEtextbooks.py functions

In [None]:
import findEtextbooks as Combine

importlib.reload(Combine)

In [None]:
pubISBNs = set(publ_isbns_set.keys())
courseISBNs = set(bookstore_isbns.keys())
catISBNs = Combine.findISBNs('CatalogFiles', 'SSH_ISBNs.txt')
# catISBNs = Combine.findISBNs('CatalogFiles', 'GUI_ISBNs.txt')

In [None]:
len(catISBNs), len(courseISBNs), len(pubISBNs)

In [None]:
def is_snippet_in_publisher_files_field(snippet, field):
    for filename, sheets in all_publ_dicts.items():
        for sheetname, textbooks in sheets.items():
            for textbook, descriptor in textbooks.items():
                for attribute, value in descriptor.items():
                    if field.lower() in attribute.lower():
                        if isinstance(value, str):
                            if snippet.lower() in value.lower():
                                print(descriptor)

is_snippet_in_publisher_files_field('9780136017509', 'isbn')

In [None]:
def multiple_isbns():
    pub_expanded_isbns = []
    for filename, sheets in all_publ_dicts.items():
        for sheetname, textbooks in sheets.items():
            for textbook, descriptor in textbooks.items():
                alternate_isbns = [i for i in descriptor if 'isbn' in i.lower()]
                if len(alternate_isbns) > 1:
                    pub_expanded_isbns.append(alternate_isbns)
    return pub_expanded_isbns

pub_expanded_isbns = multiple_isbns()
                        

In [None]:
len(pub_expanded_isbns)

In [None]:
isbns_not_in_gobi = ["9780738083193", "9781305674172", "9781121838345", "9780544309760", "9781260000177", "9781133907664", "9781259314582", "9780536260192", "9781259702297", "9780321923752", "9781259708831", "9780738067155", "9780538738101", "9789810185060", "9781319053369", "9781305931954", "9789810185114", "9789812714114", "9781259664571", "9780133951301", "9789810185039", "9781285858265", "9781886553514", "9781626808546", "9781256692669", "9789810185107", "9781133074137", "9780738083209", "9789810185152", ]

In [None]:
isbn_no_gobi_preferred_format = ["9780394722740", "9780133316032", "9780393306767", "9780881335408", "9780393311525", "9781555706173", "9780674005457", "9780030339875", "9781589481169", "9781585101030", "9781931666114", "9781585101023", "9780131497450", "9780131186552", "9780131849372", "9780135151037", "9780321596956", "9781433805615", "9781591584780", "9780974814056", "9780132333337", "9781608313846", "9780736080606", "9780138140007", "9780073401126", "9781424051007", "9781424062102", "9781424051014", "9780840033550", "9780132473897", "9781931666367", "9780132473989", "9780135024775", "9780132415439", "9780321592583", "9781605473178", "9780131849341", "9780137144433", "9789620058905", "9789620058899", "9780138015626", "9780078029219", "9780982703496", "9780132483544", "9780205096893", "9780194416306", "9781118147900", "9780205205165", "9780375432323", "9780757592065", "9780132469326", "9781605258027", "9781609139551", "9781609139568", "9781118083390", "9781133606499", "9781133956402", "9781118133576", "9780138022129", "9781285064079", "9781451176117", "9780132313506", "9780133041828", "9780132912747", "9781285194882", "9780321923233", "9780321910417", "9781285846859", "9781455704187", "9780133431100", "9781450443616", "9781429255264", "9780321928351", "9781935660194", "9780321391575", "9780321925718", "9781451177305", "9781447938958", "9781621590033", "9780205987498", "9781450424981", "9781284057539", "9781450456999", "9780133382259", "9781285867571", "9780133958812", "9780133865011", "9780132940405", "9781305272422", "9780133797190", "9780078095146", "9781506305288", "9781464135385", "9780194818070", "9781605352756", "9780133382143", "9781483359465", "9782761356541", "9780321964670", "9781259350825", "9780393283402", "9781498755016", "9781305633803", "9781464106644", "9780134027265", "9781259420474", "9781305616691", "9780393602630", "9780205964345", "9780134524061", "9781118866153", "9781285473291", "9780134701325", "9780133382075", "9780736030564", "9780131500457", "9780194369763", "9781621590064", "9781137494740", ]

In [None]:
for isbn in isbn_no_gobi_preferred_format:
    similars = Combine.find_similar_isbns(isbn)
    if len(similars) > 1:
        print(isbn, similars)

In [None]:
# none of the isbns_not_in_gobi are in the publisher sheets
# none of the isbn_no_gobi_preferred_format are in the publisher sheets

for isbn in isbn_no_gobi_preferred_format:
    for pub_expanded_bunch in pub_expanded_isbns:
        if isbn in pub_expanded_bunch:
            print(pub_expanded_bunch)

In [None]:
sets_of_similar_isbns = list()
for isbn in courseISBNs:
    set_of_similar_isbns = Combine.find_similar_isbns(isbn)
    sets_of_similar_isbns.append(set_of_similar_isbns)
xCourseISBNs = Combine.flatten_set_of_sets(sets_of_similar_isbns)

In [None]:
course_and_pub_set = xCourseISBNs.intersection(pubISBNs)
course_and_cat_set = xCourseISBNs.intersection(catISBNs)

print(sorted(course_and_pub_set))
print(sorted(course_and_cat_set))

In [None]:
matches = pubISBNs.intersection(catISBNs).intersection(xCourseISBNs)
needToBuy = pubISBNs.difference(catISBNs).intersection(xCourseISBNs)
notDRMfree = catISBNs.difference(pubISBNs).intersection(xCourseISBNs)

## Using worldcat getMetadata API

In [None]:
import os
os.makedirs('output/toEmily', exist_ok=True)
matches_data = Combine.getMetadata(matches)
need_to_buy_data = Combine.getMetadata(needToBuy)
not_drm_free_data = Combine.getMetadata(notDRMfree)


In [None]:
print(matches)
print(notDRMfree)
print(needToBuy)

In [None]:
for group in (matches_data, need_to_buy_data, not_drm_free_data):
    for ebook in group:
        for k, v in ebook.items():
            print("{}:\t\t{}".format(k, v))
        print('\n')
    print('\n')

In [None]:
def show_similar_isbns(isbn):
    for similar_set in sets_of_similar_isbns:
        if isbn in similar_set:
            return similar_set
    

In [None]:
# for bunch in ('9780123819604', ):
#     print('\n')
for isbn in ('9780136017509', ):
    print('**************************')
    print('bookstore isbn:', isbn)
    print('similar isbns:', show_similar_isbns(isbn), '\n')
    if show_similar_isbns(isbn):
        for isbn in show_similar_isbns(isbn):
            pub_item = publ_isbns_set.get(isbn)
            if pub_item:
                print("From Publisher websites:", pub_item, '\n')
    bookstore_item = bookstore_isbns.get(isbn)
    if bookstore_item:
        bookstore_headers = ('Dept/Course', 'Section', 'empty1', 'Professor',
       'Author', 'Title', 'empty2', 'ISBN', 'Publisher', 'RcCd', 'STS')
        bookstore_item_dict = {k: v for k, v in zip(bookstore_headers, bookstore_item)}
        print("From the B&N Bookstore csv:", bookstore_item_dict, "\n")
        print('\n')
        print("From Course Catalog:", lookup_course_catalog_item(bookstore_item_dict), '\n')
print('\n')

In [None]:
import parse_course_catalog as CourseCatalog

def lookup_course_catalog_item(bookstore_item_dict):
    year = '2017'
    season = 'Summer_2017'
    for root, dirs, files in os.walk(os.path.join('course_listings', year, season)):
        for file in files:
            sought_dept = bookstore_item_dict.get('Dept/Course').split(' ')[0]
            if sought_dept in file:
                filepath = os.path.join(root, file)
                course_nts, season_dept = CourseCatalog.parse_course_listing_texts(filepath)
                return [course_nt for course_nt in course_nts
                        if course_nt.abbr_num.replace(' ', '') == bookstore_item_dict['Dept/Course'].replace(' ', '')]

In [None]:
item_x = {'Publisher': 'TAYLOR', 
          'RcCd': 'RQ',
          'STS': '',
          'Author': 'CORNELIUS',
          'empty1': '',
          'Title': 'MUSIC   -TEXT',
          'ISBN': '978-0-13-601750-9',
          'Dept/Course': 'MUS 1751',
          'empty2': '',
          'Professor': 'Perry',
          'Section': '3'}

lookup_course_catalog_item(item_x)

In [None]:
import ebsco_discovery_functions as Discovery

In [None]:
import json

response = Discovery.main('9780124104228')



In [None]:
response_json = json.dumps(json.loads(response), sort_keys=True, indent=2)
print(response_json)

In [None]:
this_isbn = ''

In [None]:
with open('/home/francis/Downloads/SpringBookstoreList.txt', 'r') as f:
    springlines = f.readlines()

ISBNregex = re.compile(r'(\b\d{13}\b)|(\b\d{9}[\d|X]\b)')

spring_isbns = []
for line in springlines:
    spring_isbns.extend([i for tup in ISBNregex.findall(line) for i in tup if i and len(i) > 12])
spring_isbns = list(set(spring_isbns))

if '9780521776004' in spring_isbns:
    print('yep')

In [None]:
string_spring_isbns = '\n'.join(spring_isbns)

In [None]:
with open('/home/francis/Desktop/crazy_filtering_spring.txt', 'w') as f:
    f.write(string_spring_isbns)