# KBART metadata reshaping script
This script takes a title list and generates two derivative KBART files (NISO KBART and OCLC KBART).

Written using Python 3.7.10

In [1]:
#import modules and libraries
import pandas as pd
import numpy as np
from datetime import datetime, date, time
filetime = datetime.now()
filetime = filetime.strftime("%Y-%m-%d")

In [2]:
#enter the name of the title list to reshape into KBART files
title_list = input("enter file name and if appropriate filepath of title list csv: ")

enter file name and if appropriate filepath of title list csv:  AWDL_title_list.csv


In [3]:
#enter the name of the digital collection so that we can use it in the file name later
collection_name = input("enter the collection name for this title list: ")

enter the collection name for this title list:  awdl


In [4]:
# load title list dataframe from csv input above
df_title_list = pd.read_csv(title_list,na_filter=False,quotechar = '"')
print(df_title_list.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 481 entries, 0 to 480
Data columns (total 40 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   note                             481 non-null    object
 1   date updated                     481 non-null    object
 2   status                           481 non-null    object
 3   publication_title                481 non-null    object
 4   print_identifier                 481 non-null    object
 5   online_identifier                481 non-null    object
 6   date_first_issue_online          481 non-null    object
 7   num_first_vol_online             481 non-null    object
 8   num_first_issue_online           481 non-null    object
 9   date_last_issue_online           481 non-null    object
 10  num_last_vol_online              481 non-null    object
 11  num_last_issue_online            481 non-null    object
 12  title_url                        481

## NISO KBART
First, let's create a KBART file compliant with the NISO standard.

In [5]:
#copy the title list into a new dataframe that will be used to create the NISO KBART file.
df_niso_kbart = df_title_list.copy()
#print(df_niso_kbart)

In [6]:
#replace ebook with fulltext in coverage_depth column
#from https://datatofish.com/replace-values-pandas-dataframe/
df_niso_kbart['coverage_depth'] = df_niso_kbart['coverage_depth'].replace(['ebook'],'fulltext')
print(df_niso_kbart['coverage_depth'])

0              
1      fulltext
2      fulltext
3      fulltext
4      fulltext
         ...   
476    fulltext
477    fulltext
478    fulltext
479    fulltext
480    fulltext
Name: coverage_depth, Length: 481, dtype: object


In [7]:
#drop unecessary columns from the NISO KBART
df_niso_kbart.drop(['note','date updated','status','ACTION','Aleph BSN','previous_url'], axis=1, inplace=True)
#print(df_niso_kbart.info())

In [8]:
#write the new NISO KBART to csv, inserting the collection name and today's date.
#from https://matthew-brett.github.io/teaching/string_formatting.html
df_niso_kbart.to_csv("nyu_global_{}_{}.csv".format(collection_name,filetime), index=False)

## OCLC KBART
Next, let's create a KBART that can be uploaded to OCLC's Worldshare platform.

In [9]:
#copy the title list into a new dataframe that will be used to create the OCLC KBART file.
df_oclc_kbart = df_title_list.copy()
#print(df_oclc_kbart)

In [10]:
#rename the notes column to conform to OCLC's expectations
df_oclc_kbart.rename(columns={'notes':'coverage_notes'}, inplace=True)
print(df_oclc_kbart.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 481 entries, 0 to 480
Data columns (total 40 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   note                             481 non-null    object
 1   date updated                     481 non-null    object
 2   status                           481 non-null    object
 3   publication_title                481 non-null    object
 4   print_identifier                 481 non-null    object
 5   online_identifier                481 non-null    object
 6   date_first_issue_online          481 non-null    object
 7   num_first_vol_online             481 non-null    object
 8   num_first_issue_online           481 non-null    object
 9   date_last_issue_online           481 non-null    object
 10  num_last_vol_online              481 non-null    object
 11  num_last_issue_online            481 non-null    object
 12  title_url                        481

In [11]:
#drop unnecessary columns
df_oclc_kbart.drop(['note','date updated','status','Aleph BSN','previous_url'], axis=1, inplace=True)
print(df_oclc_kbart.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 481 entries, 0 to 480
Data columns (total 35 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   publication_title                481 non-null    object
 1   print_identifier                 481 non-null    object
 2   online_identifier                481 non-null    object
 3   date_first_issue_online          481 non-null    object
 4   num_first_vol_online             481 non-null    object
 5   num_first_issue_online           481 non-null    object
 6   date_last_issue_online           481 non-null    object
 7   num_last_vol_online              481 non-null    object
 8   num_last_issue_online            481 non-null    object
 9   title_url                        481 non-null    object
 10  first_author                     481 non-null    object
 11  title_id                         481 non-null    object
 12  embargo_info                     481

In [12]:
#reorder the columns by moving the NISO KBART columns not found in OCLC's KBART to the end
df_oclc_kbart = df_oclc_kbart[["publication_title","print_identifier","online_identifier","date_first_issue_online","num_first_vol_online","num_first_issue_online","date_last_issue_online","num_last_vol_online","num_last_issue_online","title_url","first_author","title_id","embargo_info","coverage_depth","coverage_notes","publisher_name","location","title_notes","staff_notes","vendor_id","oclc_collection_name","oclc_collection_id","oclc_entry_id","oclc_linkscheme","oclc_number","ACTION","publication_type","date_monograph_published_print","date_monograph_published_online","monograph_volume","monograph_edition","first_editor","parent_publication_title_id","preceding_publication_title_id","access_type"]]
print(df_oclc_kbart.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 481 entries, 0 to 480
Data columns (total 35 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   publication_title                481 non-null    object
 1   print_identifier                 481 non-null    object
 2   online_identifier                481 non-null    object
 3   date_first_issue_online          481 non-null    object
 4   num_first_vol_online             481 non-null    object
 5   num_first_issue_online           481 non-null    object
 6   date_last_issue_online           481 non-null    object
 7   num_last_vol_online              481 non-null    object
 8   num_last_issue_online            481 non-null    object
 9   title_url                        481 non-null    object
 10  first_author                     481 non-null    object
 11  title_id                         481 non-null    object
 12  embargo_info                     481

In [13]:
#write the new OCLC KBART to csv, inserting the collection name and today's date.
#from https://matthew-brett.github.io/teaching/string_formatting.html
df_oclc_kbart.to_csv("nyu_oclc_{}_{}.csv".format(collection_name,filetime), index=False)