Skip to content

Commit

Permalink
feat: adds Metadata.get_corpus_names() to retrieve the names in chron…
Browse files Browse the repository at this point in the history
…ological order
  • Loading branch information
johentsch committed Dec 14, 2023
1 parent 2de9524 commit a5c3988
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 4 deletions.
21 changes: 19 additions & 2 deletions src/dimcat/data/resources/features.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from typing import Iterable, List, Optional, Tuple
from typing import Callable, Iterable, List, Optional, Tuple

import frictionless as fl
import marshmallow as mm
Expand All @@ -20,6 +20,7 @@
from dimcat.data.resources.utils import (
boolean_is_minor_column_to_mode,
condense_dataframe_by_groups,
get_corpus_display_name,
join_df_on_index,
make_adjacency_groups,
merge_ties,
Expand Down Expand Up @@ -63,6 +64,20 @@ def get_composition_years(
result = years.groupby(group_cols).mean()
return result

def get_corpus_names(
self,
func: Callable[[str], str] = get_corpus_display_name,
):
"""Returns the corpus names in chronological order, based on their pieces' mean composition years.
If ``func`` is specify, the function will be applied to each corpus name. This is useful for prettifying
the names, e.g. by removing underscores.
"""
mean_composition_years = self.get_composition_years(group_cols="corpus")
sorted_corpus_names = mean_composition_years.sort_values().index.to_list()
if func is None:
return sorted_corpus_names
return [func(corp) for corp in sorted_corpus_names]


# region Annotations
AUXILIARY_HARMONYLABEL_COLUMNS = [
Expand Down Expand Up @@ -876,7 +891,9 @@ def _format_dataframe(self, feature_df: D) -> D:
group_keys, _ = make_adjacency_groups(
feature_df.localkey, groupby=groupby_levels
)
feature_df = condense_dataframe_by_groups(feature_df, group_keys)
feature_df = condense_dataframe_by_groups(
feature_df, group_keys, logger=self.logger
)
return self._sort_columns(feature_df)


Expand Down
58 changes: 58 additions & 0 deletions src/dimcat/data/resources/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import warnings
from collections import Counter
from functools import cache
from operator import itemgetter
from pprint import pformat
from typing import (
Expand Down Expand Up @@ -51,6 +52,52 @@
FALSY_VALUES = Boolean.falsy


DCML_CORPUS_NAMES: Dict[str, str] = {
"ABC": "Beethoven String Quartets",
"bach_en_fr_suites": "Bach Suites",
"bach_solo": "Bach Solo",
"bartok_bagatelles": "Bartok Bagatelles",
"beethoven_piano_sonatas": "Beethoven Sonatas",
"c_schumann_lieder": "C Schumann Lieder",
"chopin_mazurkas": "Chopin Mazurkas",
"corelli": "Corelli Trio Sonatas",
"couperin_clavecin": "Couperin Clavecin",
"couperin_concerts": "Couperin Concerts Royaux",
"cpe_bach_keyboard": "CPE Bach Keyboard",
"debussy_suite_bergamasque": "Debussy Suite Bergamasque",
"dvorak_silhouettes": "Dvořák Silhouettes",
"frescobaldi_fiori_musicali": "Frescobaldi Fiori Musicali",
"gastoldi_baletti": "Gastoldi Baletti",
"grieg_lyric_pieces": "Grieg Lyric Pieces",
"handel_keyboard": "Handel Keyboard",
"jc_bach_sonatas": "JC Bach Sonatas",
"kleine_geistliche_konzerte": "Schütz Kleine Geistliche Konzerte",
"kozeluh_sonatas": "Kozeluh Sonatas",
"liszt_pelerinage": "Liszt Années",
"mahler_kindertotenlieder": "Mahler Kindertotenlieder",
"medtner_tales": "Medtner Tales",
"mendelssohn_quartets": "Mendelssohn Quartets",
"monteverdi_madrigals": "Monteverdi Madrigals",
"mozart_piano_sonatas": "Mozart Piano Sonatas",
"pergolesi_stabat_mater": "Pergolesi Stabat Mater",
"peri_euridice": "Peri Euridice",
"pleyel_quartets": "Pleyel Quartets",
"poulenc_mouvements_perpetuels": "Poulenc Mouvements Perpetuels",
"rachmaninoff_piano": "Rachmaninoff Piano",
"ravel_piano": "Ravel Piano",
"scarlatti_sonatas": "Scarlatti Sonatas",
"schubert_dances": "Schubert Dances",
"schubert_winterreise": "Schubert Winterreise",
"schulhoff_suite_dansante_en_jazz": "Schulhoff Suite Dansante En Jazz",
"schumann_kinderszenen": "R Schumann Kinderszenen",
"schumann_liederkreis": "R Schumann Liederkreis",
"sweelinck_keyboard": "Sweelinck Keyboard",
"tchaikovsky_seasons": "Tchaikovsky Seasons",
"wagner_overtures": "Wagner Overtures",
"wf_bach_sonatas": "WF Bach Sonatas",
}


def align_with_grouping(
df: pd.DataFrame, grouping: DimcatIndex | pd.MultiIndex, sort_index: bool = True
) -> pd.DataFrame:
Expand Down Expand Up @@ -320,6 +367,17 @@ def fl_fields2pandas_params(fields: List[fl.Field]) -> Tuple[dict, dict, list]:
return dtype, converters, parse_dates


@cache
def get_corpus_display_name(repo_name: str) -> str:
"""Looks up a repository name in the CORPUS_NAMES constant. If not present,
the repo name is returned as title case.
"""
name = DCML_CORPUS_NAMES.get(repo_name, "")
if name == "":
name = " ".join(s.title() for s in repo_name.split("_"))
return name


def get_existing_normpath(fl_resource) -> str:
"""Get the normpath of a frictionless resource, raising an exception if it does not exist.
Expand Down
3 changes: 1 addition & 2 deletions src/dimcat/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
)
"""Fixed category colors for cadence labels."""

PLOTLY_COLOR_SCALES = ColorscaleValidator().named_colorscales
COLOR_SCALE_NAMES: List[str] = sorted(PLOTLY_COLOR_SCALES.keys())
COLOR_SCALE_NAMES: List[str] = sorted(ColorscaleValidator().named_colorscales.keys())
CS: TypeAlias = Literal[tuple(COLOR_SCALE_NAMES)]


Expand Down

0 comments on commit a5c3988

Please sign in to comment.