## Publisher Spreadsheets

In [None]:
import importlib
import os

In [None]:
import parse_publisher_sheets as Publishers
importlib.reload(Publishers)

In [None]:
all_publ_dicts = Publishers.main()

In [None]:
publ_isbns_set = Publishers.make_set_all_isbns(all_publ_dicts)

## Bookstore csv

In [None]:
import re
import parse_bookstore_csv as Bookstore
from collections import namedtuple

ISBNregex = re.compile(r'(\b\d{13}\b)|(\b\d{9}[\d|X]\b)')

bookstore_csv_items = Bookstore.cleanup_original_text('/home/francis/Downloads/Summer_v2_bookstorelist.csv')

In [None]:
bookstore_isbns_nts = dict()

for num, row in enumerate(bookstore_csv_items):
    if num == 0:
        headers = row
        BookstoreItemNT = namedtuple('BookstoreItemNT', [i.replace('/', '') for i in headers])
        continue
    isbn = row[7].replace('-', '')
    bookstore_isbns_nts[isbn] = BookstoreItemNT(*row[:11])

# bookstore_isbns = {row[7].replace('-', ''): row
#                    for row in bookstore_csv_items
#                    if ISBNregex.match(row[7].replace('-', ''))}

## Symphony isbns


In [None]:
ISBNregex = re.compile(r'(\b\d{13}\b)|(\b\d{9}[\d|X]\b)')

def findISBNs(filepath, filename):
    isbns = []
    full_filepath = os.path.join(filepath, filename)
    with open(full_filepath, "r", encoding="utf-8", errors="surrogateescape") as isbn_lines:
        read_data = isbn_lines.readlines()
    for line in read_data:
        isbns.extend(ISBNregex.findall(line))
    stripped = set()
    stripped = {isbn.replace('-', '') for tuple_group in isbns for isbn in tuple_group if isbn}
    return stripped

Symphony_isbns = findISBNs('output', 'symphony.txt')

## Worldcat expanding bookstore isbns

In [None]:
import findEtextbooks as Combine

importlib.reload(Combine)

In [None]:
sets_of_similar_isbns = list()
for isbn in set(bookstore_isbns.keys()):
    set_of_similar_isbns = Combine.find_similar_isbns(isbn)
    sets_of_similar_isbns.append(set_of_similar_isbns)
xCourseISBNs = Combine.flatten_set_of_sets(sets_of_similar_isbns)

In [None]:
def show_similar_isbns(isbn):
    for similar_set in sets_of_similar_isbns:
        if isbn in similar_set:
            return similar_set
    

## Course Catalog

In [None]:
import parse_course_catalog as CourseCatalog

In [None]:
# course_file = 'course_listings/2017/Summer_2017/MUSIC3S17A.txt'
# course_nts, season_dept = CourseCatalog.parse_course_listing_texts(course_file)

In [None]:
def lookup_all_matching_courses(dept_coursenum, year, season):
    yearpath = os.path.join('course_listings', year)
    if not os.path.isdir(yearpath):
        print('year must be among: {}'.format(', '.join(i for i in os.listdir(yearpath))))
    seasonpath = os.path.join(yearpath, season)
    if not os.path.isdir(seasonpath):
        print('season must be among: {}'.format(', '.join(i for i in os.listdir(seasonpath))))
    for root, dirs, files in os.walk(seasonpath):
        for file in files:
            sought_dept = dept_coursenum.split(' ')[0]
            if sought_dept in file:
                filepath = os.path.join(root, file)
                course_nts, season_dept = CourseCatalog.parse_course_listings(filepath)
                return [course_nt for course_nt in course_nts
                        if course_nt.abbr_num.replace(' ', '') == dept_coursenum.replace(' ', '')]

In [None]:
course_and_number = 'MUS 1751'
year = '2017'
season = 'Summer_2017'

lookup_all_matching_courses(course_and_number, year, season)

## Ebsco Discovery

In [None]:
import ebsco_discovery_functions as Discovery

In [None]:
# get alternate format isbns for an isbns from Ebsco Discovery
import os
import json

def find_alt_isbns(relation_list):
    alt_isbns = dict()
    for relation in relation_list:
        try:
            identifiers = relation['BibEntity']['Identifiers']
        except KeyError:
            continue
        for identifier in identifiers:
            if identifier and identifier['Type'] and 'isbn' in identifier['Type'].lower():
                alt_isbns[identifier['Value']] = identifier['Type']
    return alt_isbns

def return_alternate_isbns(response_json):
    alternate_isbn_type = dict()
    records_list = response_json['SearchResult']['Data']['Records']
    for record in records_list:
        try:
            record_relationships = record['RecordInfo']['BibRecord']['BibRelationships']['IsPartOfRelationships']
        except KeyError:
            continue
        alternate_isbn_type.update(find_alt_isbns(record_relationships))
    return alternate_isbn_type

def return_any_hits(response_json):
    for database in response_json["SearchResult"]["Statistics"]["Databases"]:
        if database["Hits"]:
            return database

for isbn, nt in bookstore_isbns_nts.items():
    if not isbn or isbn.lower().strip() == "none":
        continue
    response = Discovery.main(isbn)
    response_json = json.loads(response)
    response_pretty = json.dumps(response_json, sort_keys=True, indent=2)
    if return_any_hits(response_json):
        target_dir = '/home/francis/Desktop/bookstore_discovery_hits/' 
        os.makedirs(target_dir, exist_ok=True)
        with open('{}/{}.json'.format(target_dir, isbn),
                  'w',
                  encoding='utf-8') as f:
            f.write(response_pretty)

In [None]:
# finds if an isbn or it's cousin is in our holdings at Middleton or Hill, etc.

all_matches_discovery = [os.path.join(root, file)
                         for root, dirs, files in os.walk('/home/francis/Desktop/bookstore_discovery_hits/')
                         for file in files]

def is_file_in_holdings(discovery_json):
    all_copy_locations = []
    records_list = discovery_json['SearchResult']['Data']['Records']    
    for record in records_list:
        holdings_list = record.get('Holdings')
        if not holdings_list:
            continue
        for holdings_info in holdings_list:
            try:
                copy_info_list = holdings_info['HoldingSimple']['CopyInformationList']
            except KeyError:
                continue
            for copy_info in copy_info_list:
                all_copy_locations.append(copy_info)
    return all_copy_locations
    
for file in all_matches_discovery:
    with open(file, 'r') as f:
        parsed_json = json.load(f)
    all_copy_locations = is_file_in_holdings(parsed_json)
    if all_copy_locations:
        filename = os.path.split(file)[1]
        isbn = os.path.splitext(filename)[0]
        print('{}: {}'.format(isbn, all_copy_locations))

## Previously matched courses etextbooks

In [None]:
# identify prof/class in current courselist that used an etextbook previously

import pandas as pd

def parse_previously_used_etextbooks(filename):
    pandas_excel = pd.ExcelFile(filename)
    cumulative_sheets = []
    for sheetname in pandas_excel.sheet_names:
        parsed_sheet = pd.read_excel(filename, sheetname=sheetname)
        for index, row in parsed_sheet.iterrows():
            row_dict = row.to_dict()
            row_dict['Sheet'] = sheetname
            cumulative_sheets.append(row_dict)
    return cumulative_sheets

In [None]:
def parse_this_seasons_course_catalog(current_year, current_season):
    course_listings_path = os.path.abspath('course_listings')
    yearpath = os.path.join(course_listings_path, current_year)
    if not os.path.exists(yearpath):
        print('year must be among:\n\n{}'.format('\n'.join(i for i in os.listdir(course_listings_path))))
        return False
    seasonpath = os.path.join(yearpath, current_season)
    if not os.path.isdir(seasonpath):
        print('season must be among:\n\n{}'.format('\n'.join(i for i in os.listdir(yearpath))))    
    all_files = [os.path.join(root, file)
                 for root, dirs, files in os.walk(seasonpath)
                 for file in files]
    return {os.path.splitext(os.path.split(filepath)[1])[0]:
            CourseCatalog.parse_course_listings(filepath)[0]
            for filepath in all_files}

In [None]:
cumulative_etextbooks = '/home/francis/Desktop/lsu-git/etextbookSearch/source_material/Cumulative_Etextbooks.xlsx'
parsed_previous_matches = parse_previously_used_etextbooks(cumulative_etextbooks)

In [None]:
current_year, current_season = '2017', 'Summer_2017_Intersession'
this_seasons_courses = parse_this_seasons_course_catalog(current_year, current_season)

In [None]:
def match_previous_to_current_courses(parsed_previous_matches, this_seasons_courses):
    for course_filename, subcourses in this_seasons_courses.items():
        for CourseItem in subcourses:
            try:
                course, number = [str(i) for i in CourseItem.abbr_num.split(' ') if i]
                instructor = CourseItem.instructor
            except ValueError:
                for ok in ('SESSION', '', ):
                    if not CourseItem.abbr_num or ok == CourseItem.abbr_num:
                        break
                    else:
                        print('debug:  {} doesnt have right format course # or instructor'.format(CourseItem))
                continue
            for previous_match in parsed_previous_matches:
#                 print(previous_match['Course'], '\t\t', course)
#                 print(previous_match['Number'], '\t\t', number)
                if previous_match['Course'].lower() in str(course).lower() and \
                    str(previous_match['Number']).lower() in str(number).lower() and \
                    str(previous_match['Instructor']).lower() in str(instructor).lower():
                    print('Previous etextbook user:', previous_match, '\n\n', 'Possible current course match:', CourseItem, '\n\n\n\n')

match_previous_to_current_courses(parsed_previous_matches, this_seasons_courses)

## Show all we know about an isbn

In [None]:
isbn = '9780136017509'


print('bookstore isbn:', isbn)
print('similar isbns:', show_similar_isbns(isbn), '\n')
if show_similar_isbns(isbn):
    for isbn in show_similar_isbns(isbn):
        is_snippet_in_publisher_files_field(isbn, 'isbn')
        print('\n')
        pub_item = publ_isbns_set.get(isbn)
        if pub_item:
            print("From Publisher websites:", pub_item, '\n')
bookstore_item = bookstore_isbns.get(isbn)
if bookstore_item:
    bookstore_headers = ('Dept/Course', 'Section', 'empty1', 'Professor',
   'Author', 'Title', 'empty2', 'ISBN', 'Publisher', 'RcCd', 'STS')
    bookstore_item_dict = {k: v for k, v in zip(bookstore_headers, bookstore_item)}
    print("From the B&N Bookstore csv:", bookstore_item_dict, "\n")
    print('\n')
    print("From Course Catalog:", lookup_course_catalog_item(bookstore_item_dict), '\n')
print('\n')

In [None]:
def is_snippet_in_publisher_files_field(snippet, field):
    for filename, sheets in all_publ_dicts.items():
        for sheetname, textbooks in sheets.items():
            for textbook, descriptor in textbooks.items():
                for attribute, value in descriptor.items():
                    if field.lower() in attribute.lower():
                        if isinstance(value, str):
                            if snippet.lower() in value.lower():
                                print(descriptor)
    print('no such "{}" in an Publishers spreadsheetfield with "{}" in the name'.format(snippet, field))

is_snippet_in_publisher_files_field('9780136017509', 'isbn')

In [None]:
def multiple_isbns():
    pub_expanded_isbns = []
    for filename, sheets in all_publ_dicts.items():
        for sheetname, textbooks in sheets.items():
            for textbook, descriptor in textbooks.items():
                alternate_isbns = [i for i in descriptor if 'isbn' in i.lower()]
                if len(alternate_isbns) > 1:
                    pub_expanded_isbns.append(alternate_isbns)
    return pub_expanded_isbns

pub_expanded_isbns = multiple_isbns()
                        

In [None]:
for k, v in bookstore_items.items():
    print(k, '\t', v)
    break

In [None]:
for k, v in bookstore_isbns_nts.items():
    print(k, "\t", v)

In [None]:
for isbn, nt in bookstore_isbns_nts.items():
    syndetics_link = "http://www.syndetics.com/index.aspx?isbn={}/LC.GIF&client=louislibnet&type=xw12&upc=&oclc=299047518&".format(isbn)
    requests.get