In [1]:
import logging
import csv
import os
import sys
from extractTOC import save_toc_to_csv, get_page_count


# Tagging Mobile Sheets index files and stuff. This is the code, enter data below

In [39]:
logger = logging.getLogger()

class MobileIndex:
    fname = None
    dlmtr = ";"
    newcsv = []
    abbrv = ""
    input_dir = ""
    output_dir = ""
    data = []
    total_pages = 0

    def __init__(
        self,
        fname,
        dlmtr,
        output_dir,
        input_dir,
        total_pages,
        abbrv=None,
        collections=None,
        genres=None,
        offset=None,
        composers=None,
        capb=False,
        
    ):
        self.abbrv = abbrv
        self.dlmtr = dlmtr
        self.output_dir = output_dir
        self.input_dir = input_dir
        self.total_pages = total_pages
        self.fname = os.path.join(self.input_dir, fname)
        
        if (
            not os.path.exists(self.input_dir)
            or not os.path.exists(self.output_dir)
            or not os.path.exists(self.fname)
        ):
            raise ValueError("Input or output directory or file does not exist")
        self.out_name = os.path.join(self.output_dir, fname)
        
        # Load the CSV data into self.data
        self.load_csv()

        # Optional adjustments
        if self.abbrv:
            self.tag_the_title()
        self.adjust_pages()
        if collections:
            self.collections = collections
            self.set_collections()
        if genres:
            self.genres = genres
            self.set_genres()
        if composers:
            self.composers = composers
            self.set_composers()
        
        if offset:
            self.begin = offset[0]
            self.offset = offset[1]

    def load_csv(self):
        """Load the CSV data into self.data"""
        with open(self.fname, newline='', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile, delimiter=self.dlmtr)
            self.data = [row for row in reader]  # Load all rows into self.data

    def tag_the_title(self):
        """Tag the title column with abbreviation"""
        if not self.data:
            return
        
        header = [column.lower() for column in self.data[0]]  # Normalize the header to lowercase
        titleindex = -1
        for row in self.data:
            if titleindex == -1:
                try:
                    titleindex = header.index("title")
                    if titleindex == -1:
                        raise ValueError("Title not found")
                    self.data[0] = header
                except Exception as ex:
                    logger.error("Input file does not have a title header")
                    logger.error(str(ex))
                    logger.error("Exiting program")
                    sys.exit(1)
            elif row[titleindex][0] != "~":  # Skip if starts with "~"
                row[titleindex] += " " + self.abbrv

    def adjust_pages(self):
        """Adjust the pages column by setting each row's 'page'
        to be [current_page]-[next_page - 1], except for the last row,
        which remains singular."""

        if not self.data:
            return

        # 1) Normalize header, find 'pages' column index
        header = [col.lower() for col in self.data[0]]
        page_index = -1

        for i, col in enumerate(header):
            if col == "pages":
                page_index = i
                break
            elif col == "page":
                page_index = i
                header[page_index] = "pages"  # Normalize the header
                self.data[0] = header
                break

        if page_index == -1:
            logger.error("Input file does not have a 'pages' or 'page' header")
            sys.exit(1)

        # 2) Sort by that column
        self.data[1:] = sorted(self.data[1:], key=lambda row: int(row[page_index]))

        # 3) For each row except the last, set range = current_page -> next_page - 1
        #    For the last row, keep it singular.
        for i in range(1, len(self.data) -1):
            current_row = self.data[i]
            next_row = self.data[i + 1]

            current_page = int(current_row[page_index])
            next_page = int(next_row[page_index])

            # If next_page - 1 is >= current_page, create a range
            # Otherwise, it's effectively "current_page-current_page" (which is singular),
            # but from your example, you clearly want something like 9 -> 14 if next is 15,
            # 15 -> 18 if next is 19, etc.
            upper_bound = next_page - 1

            if upper_bound >= current_page:
                current_row[page_index] = f"{current_page}-{upper_bound}"
            else:
                # Means next_page is actually <= current_page
                # so there's no "gap". We'll just keep it singular
                # (or do whatever you prefer in that edge case).
                current_row[page_index] = str(current_page)

        # 4) The last row is left as singular
        self.data[-1][page_index] = f"{int(self.data[-1][page_index])}-{self.total_pages}"
            

        # (Optional) Write out the file
        self.write_csv()


    def write_csv(self):
        """Write the updated data back to the output CSV file"""
        with open(self.out_name, mode='w', newline='', encoding='utf-8') as output_csvfile:
            writer = csv.writer(output_csvfile, delimiter=";")
            writer.writerows(self.data)  # Write the entire data including header

    def set_collections(self):
        """
        Add or update a 'collections' column in self.data with `collection`.
        """

        # Normalize the header to lowercase for searching
        header_lower = [col.lower() for col in self.data[0]]

        # Check if 'collections' already exists in the header
        if 'collections' not in header_lower:
            # Add the column name to the header
            self.data[0].append('collections')
            col_index = len(self.data[0]) - 1
            # Fill each data row with self.collections
            for row in self.data[1:]:
                row.append(self.collections)
        else:
            # If it exists, just update the column in each row
            col_index = header_lower.index('collections')
            for row in self.data[1:]:
                row[col_index] = self.collections


    def set_genres(self):
        """
        Add or update a 'genres' column in self.data with `genres`.
        """

        # Normalize the header to lowercase for searching
        header_lower = [col.lower() for col in self.data[0]]

        # Check if 'genres' already exists
        if 'genres' not in header_lower:
            self.data[0].append('genres')
            col_index = len(self.data[0]) - 1
            for row in self.data[1:]:
                row.append(self.genres)
        else:
            col_index = header_lower.index('genres')
            for row in self.data[1:]:
                row[col_index] = self.genres


    def set_composers(self):
        """
        Add or update a 'composers' column in self.data with `composers`.
        """

        # Normalize the header to lowercase for searching
        header_lower = [col.lower() for col in self.data[0]]

        # Check if 'composers' already exists
        if 'composers' not in header_lower:
            self.data[0].append('composers')
            col_index = len(self.data[0]) - 1
            for row in self.data[1:]:
                row.append(self.composers)
        else:
            col_index = header_lower.index('composers')
            for row in self.data[1:]:
                row[col_index] = self.composers


In [61]:
fname = 'The Beatles Complete - Vol 1 A-I.csv'
dlmtr = ","
output_dir = '/usr/local/dev/MuApi/SheetMusicIndices/indexes'
input_dir = '/usr/local/dev/MuApi/SheetMusicIndices/origindex/'

abbrv = "[BCV1]"
collections = 'The Beatles Complete'
genres = 'Rock|Pop'
composers = "Lennon/Mcartney, George Harrison"
pdf_file = "/home/mike/Documents/tunes/Beatles/Beatles/The Beatles Complete - Vol 1 A-I.pdf"



In [58]:
fname = 'The Beatles Complete - Vol 2 I-Z.csv'
pdf_file = "/home/mike/Documents/tunes/Beatles/Beatles/The Beatles Complete - Vol 2 I-Z.pdf"
abbrv = "{BCV2}"

In [59]:

csv_file = os.path.join(input_dir, fname)
save_toc_to_csv(pdf_file, csv_file)
total_pages = get_page_count(pdf_file)
total_pages

511

In [62]:
ms = MobileIndex(
    fname,
    dlmtr,
    output_dir,
    input_dir,
    total_pages,
    abbrv=abbrv,
    genres=genres,
    collections=collections,
    composers=composers,
)
ms.data
ms.write_csv()


In [63]:
ms.data

[['title', 'level', 'pages', 'collections', 'genres', 'composers'],
 ['Act Naturally [BCV1]',
  '1',
  '9-14',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ["All I've Got To Do [BCV1]",
  '1',
  '15-18',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ['Across The Universe [BCV1]',
  '1',
  '19-25',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ['All My Loving [BCV1]',
  '1',
  '26-30',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ['All Together Now [BCV1]',
  '1',
  '31-38',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ['All You Need Is Love [BCV1]',
  '1',
  '39-45',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ['And I Love Her [BCV1]',
  '1',
  '46-50',
  'The Beatles Complete',
  'Rock|Pop',
  'Lennon/Mcartney, George Harrison'],
 ['And Your Bird Can Sing [BCV1]',
  '1',
  '51-56

Column formats:

* composers: "Beethoven,  Walter Becker"
* keys: C|F|G|D
* genres: Jazz| Christmas
* instrument: Piano|Saxaphone

In [None]:
columns = [   
    "title",	
    "pages",	
    "keys",	
    "composers",	
    "genres",
    "instrument",	
    "albums",	
    "artists",	
    "collections",	
    "years",	
    "source_types",	
    "signatures",	
    "tempos",	
    "duration",
]
