# KBART metadata reshaping script
This script takes a title list and generates two derivative KBART files (NISO KBART and OCLC KBART).

Written using Python 3.7.10

to do: move filters to exclude rows from the original title list to beginning of script, generally reduce redundancies in the two sections of the script; also maybe conditional thing so you can choose which type of KBART to output.

In [None]:
#import modules and libraries
import pandas as pd
import numpy as np
from datetime import datetime, date, time
filetime = datetime.now()
filetime = filetime.strftime("%Y-%m-%d")

In [None]:
#enter the name of the title list to reshape into KBART files
title_list = input("enter file name and if appropriate filepath of title list csv: ")

In [None]:
#enter the name of the digital collection so that we can use it in the file name later
collection_name = input("enter the collection name for this title list: ")

In [None]:
#enter the statuses to exclude from the output
status_filter = input("enter the statuses to exclude, separated by a pipe character: ")

In [None]:
# load title list dataframe from csv input above
df_title_list = pd.read_csv(title_list,na_filter=False,quotechar = '"')
print(df_title_list.info())

## NISO KBART
First, let's create a KBART file compliant with the NISO standard.

In [None]:
#copy the title list into a new dataframe that will be used to create the NISO KBART file.
df_niso_kbart = df_title_list.copy()
#print(df_niso_kbart)

In [None]:
#replace ebook with fulltext in coverage_depth column
#from https://datatofish.com/replace-values-pandas-dataframe/
df_niso_kbart['coverage_depth'] = df_niso_kbart['coverage_depth'].replace(['ebook'],'fulltext')
print(df_niso_kbart['coverage_depth'])

In [None]:
#concatenate original script and transliterated title columns
#based on https://stackoverflow.com/questions/60724940/concatenate-strings-across-columns-that-are-not-null
df_niso_kbart['publication_title'] = df_niso_kbart[['publication_title_original', 'publication_title_transliteration']].apply(lambda x: ' '.join(x.dropna()), axis=1)
print(df_niso_kbart['publication_title'])
print(df_niso_kbart.loc[21,'publication_title'])

In [None]:
#drop rows that contain a status value of "in progress" or "delete"
#based on https://www.statology.org/pandas-drop-rows-that-contain-string/
df_niso_kbart = df_niso_kbart[df_niso_kbart['status'].str.contains(status_filter)==False]
print(df_niso_kbart.loc[95:120,'publication_title'])

In [None]:
#drop unecessary columns from the NISO KBART
df_niso_kbart.drop(['note','date updated','status','publication_title_original','publication_title_transliteration','ACTION','Aleph BSN','previous_url'], axis=1, inplace=True)
print(df_niso_kbart.info())

In [None]:
#move publication_title to be the first column
#based on https://www.geeksforgeeks.org/how-to-move-a-column-to-first-position-in-pandas-dataframe/
#and https://stackoverflow.com/questions/25122099/move-column-by-name-to-front-of-table-in-pandas
col = df_niso_kbart.pop('publication_title')
df_niso_kbart.insert(0, col.name, col)
print(df_niso_kbart.info())

In [None]:
#write the new NISO KBART to tsv, inserting the collection name and today's date.
#from https://matthew-brett.github.io/teaching/string_formatting.html
df_niso_kbart.to_csv("nyu_global_{}_{}.txt".format(collection_name,filetime), sep="\t", index=False)

## OCLC KBART
Next, let's create a KBART that can be uploaded to OCLC's Worldshare platform.

In [None]:
#copy the title list into a new dataframe that will be used to create the OCLC KBART file.
df_oclc_kbart = df_title_list.copy()
#print(df_oclc_kbart)

In [None]:
#rename the notes column to conform to OCLC's expectations
df_oclc_kbart.rename(columns={'notes':'coverage_notes'}, inplace=True)
print(df_oclc_kbart.info())

In [None]:
#concatenate original script and transliterated title columns
#based on https://stackoverflow.com/questions/60724940/concatenate-strings-across-columns-that-are-not-null
df_oclc_kbart['publication_title'] = df_oclc_kbart[['publication_title_original', 'publication_title_transliteration']].apply(lambda x: ' '.join(x.dropna()), axis=1)
print(df_oclc_kbart['publication_title'])
print(df_oclc_kbart.loc[21,'publication_title'])

In [None]:
#drop rows that contain a status value of "in progress" or "delete"
#based on https://www.statology.org/pandas-drop-rows-that-contain-string/
df_oclc_kbart = df_oclc_kbart[df_oclc_kbart['status'].str.contains(status_filter)==False]
print(df_oclc_kbart.loc[95:120,'publication_title'])

In [None]:
#drop unnecessary columns
df_oclc_kbart.drop(["note","date updated","status","Aleph BSN","previous_url","publication_title_original","publication_title_transliteration","publication_type","date_monograph_published_print","date_monograph_published_online","monograph_volume","monograph_edition","first_editor","parent_publication_title_id","preceding_publication_title_id","access_type"], axis=1, inplace=True)
print(df_oclc_kbart.info())

In [None]:
#reorder the columns by moving the NISO KBART columns not found in OCLC's KBART to the end
df_oclc_kbart = df_oclc_kbart[["publication_title","print_identifier","online_identifier","date_first_issue_online","num_first_vol_online","num_first_issue_online","date_last_issue_online","num_last_vol_online","num_last_issue_online","title_url","first_author","title_id","embargo_info","coverage_depth","coverage_notes","publisher_name","location","title_notes","staff_notes","vendor_id","oclc_collection_name","oclc_collection_id","oclc_entry_id","oclc_linkscheme","oclc_number","ACTION"]]
print(df_oclc_kbart.info())

In [None]:
#write the new OCLC KBART to csv, inserting the collection name and today's date.
#from https://matthew-brett.github.io/teaching/string_formatting.html
df_oclc_kbart.to_csv("nyu_oclc_{}_{}.txt".format(collection_name,filetime), sep="\t", index=False)